Repository: pkardas/notes Branch: master Commit: 7b0d56be00b4 Files: 180 Total size: 904.1 KB Directory structure: gitextract_z_5iswqa/ ├── .gitignore ├── README.md ├── books/ │ ├── architecture-hard-parts.md │ ├── build.md │ ├── clean-agile.md │ ├── clean-code.md │ ├── coaching-agile-teams.md │ ├── code-complete.md │ ├── comic-agile.md │ ├── cracking-coding-interview/ │ │ ├── Dockerfile │ │ ├── docker-compose.yml │ │ ├── notes.md │ │ ├── requirements.txt │ │ └── src/ │ │ ├── ch01_arrays_and_strings/ │ │ │ ├── check_permutation.py │ │ │ ├── is_unique.py │ │ │ ├── one_away.py │ │ │ ├── palindrome_permutation.py │ │ │ ├── rotate_matrix.py │ │ │ ├── string_compression.py │ │ │ ├── string_rotation.py │ │ │ ├── urlify.py │ │ │ └── zero_matrix.py │ │ └── ch02_linked_lists/ │ │ ├── delete_middle_node.py │ │ ├── intersection.py │ │ ├── linked_list.py │ │ ├── loop_detection.py │ │ ├── palindrome.py │ │ ├── partition.py │ │ ├── remove_dups.py │ │ ├── return_kth_to_last.py │ │ └── sum_lists.py │ ├── ddd.md │ ├── ddia.md │ ├── docker-deep-dive.md │ ├── elixir.md │ ├── fundamentals-of-architecture.md │ ├── go/ │ │ ├── ch01/ │ │ │ ├── Makefile │ │ │ └── hello.go │ │ ├── ch02/ │ │ │ ├── const.go │ │ │ └── unicode.go │ │ ├── ch03/ │ │ │ └── types.go │ │ ├── ch04/ │ │ │ ├── case.go │ │ │ ├── for.go │ │ │ └── if.go │ │ ├── ch05/ │ │ │ ├── anonymous.go │ │ │ ├── deferExample.go │ │ │ ├── functionAsParam.go │ │ │ ├── functions.go │ │ │ ├── functionsAreValues.go │ │ │ └── returnFunction.go │ │ ├── ch06/ │ │ │ └── pointers.go │ │ ├── ch07/ │ │ │ ├── counter.go │ │ │ ├── dependencyInjection.go │ │ │ ├── embedding.go │ │ │ ├── intTree.go │ │ │ ├── interfaces.go │ │ │ ├── iota.go │ │ │ └── types.go │ │ ├── ch08/ │ │ │ ├── customErrors.go │ │ │ ├── errors.go │ │ │ ├── panic.go │ │ │ ├── recover.go │ │ │ ├── sentinel.go │ │ │ └── wrappingErrors.go │ │ ├── ch09/ │ │ │ ├── formatter/ │ │ │ │ └── formatter.go │ │ │ ├── main.go │ │ │ └── math/ │ │ │ └── math.go │ │ ├── ch10/ │ │ │ ├── deadlock.go │ │ │ ├── deadlockSolution.go │ │ │ └── goroutinesExample.go │ │ └── notes.md │ ├── hands-on-ml.md │ ├── head-first-design-patterns/ │ │ ├── ch_01_strategy.py │ │ ├── ch_02_observer.py │ │ ├── ch_03_decorator.py │ │ ├── ch_04_factory.py │ │ ├── ch_05_singleton.py │ │ ├── ch_06_command.py │ │ ├── ch_07_adapter.py │ │ ├── ch_07_facade.py │ │ ├── ch_08_template_method.py │ │ ├── ch_09_composite.py │ │ ├── ch_09_iterator.py │ │ ├── ch_10_state.py │ │ ├── ch_11_virtual_proxy.py │ │ └── notes.md │ ├── kubernetes-book.md │ ├── kubernetes-in-action.md │ ├── nlp-book.md │ ├── peopleware.md │ ├── pragmatic-programmer.md │ ├── pytest/ │ │ ├── .coveragerc │ │ ├── Dockerfile │ │ ├── docker-compose.yml │ │ ├── notes.md │ │ ├── requirements.txt │ │ ├── setup.cfg │ │ ├── src/ │ │ │ ├── __init__.py │ │ │ ├── api.py │ │ │ ├── cli.py │ │ │ └── db.py │ │ └── tests/ │ │ ├── ch_02/ │ │ │ ├── test_card.py │ │ │ ├── test_classes.py │ │ │ ├── test_exceptions.py │ │ │ └── test_helper.py │ │ ├── ch_03/ │ │ │ ├── conftest.py │ │ │ ├── test_autouse.py │ │ │ ├── test_count.py │ │ │ ├── test_count_initial.py │ │ │ ├── test_fixtures.py │ │ │ ├── test_rename_fixture.py │ │ │ └── test_some.py │ │ ├── ch_04/ │ │ │ ├── conftest.py │ │ │ ├── test_config.py │ │ │ ├── test_tmp.py │ │ │ └── test_version.py │ │ ├── ch_05/ │ │ │ └── test_parametrize.py │ │ ├── ch_06/ │ │ │ ├── pytest.ini │ │ │ ├── test_builtin.py │ │ │ ├── test_custom.py │ │ │ └── text_combination.py │ │ ├── ch_12/ │ │ │ ├── hello.py │ │ │ └── test_hello.py │ │ └── ch_15/ │ │ ├── conftest.py │ │ ├── pytest.ini │ │ └── test_slow.py │ ├── python-architecture-patterns/ │ │ ├── Dockerfile │ │ ├── Makefile │ │ ├── docker-compose.yml │ │ ├── notes.md │ │ ├── requirements.txt │ │ ├── setup.cfg │ │ ├── src/ │ │ │ ├── __init__.py │ │ │ ├── adapters/ │ │ │ │ ├── __init__.py │ │ │ │ ├── notifications.py │ │ │ │ ├── orm.py │ │ │ │ ├── redis_publisher.py │ │ │ │ └── repository.py │ │ │ ├── app.py │ │ │ ├── bootstrap.py │ │ │ ├── config.py │ │ │ ├── domain/ │ │ │ │ ├── __init__.py │ │ │ │ ├── commands.py │ │ │ │ ├── events.py │ │ │ │ └── model.py │ │ │ ├── redis_consumer.py │ │ │ ├── service_layer/ │ │ │ │ ├── __init__.py │ │ │ │ ├── handlers.py │ │ │ │ ├── message_bus.py │ │ │ │ └── unit_of_work.py │ │ │ └── views.py │ │ └── tests/ │ │ ├── __init__.py │ │ ├── conftest.py │ │ ├── e2e/ │ │ │ ├── __init__.py │ │ │ ├── api_client.py │ │ │ ├── redis_client.py │ │ │ ├── test_app.py │ │ │ └── test_external_events.py │ │ ├── integration/ │ │ │ ├── __init__.py │ │ │ ├── test_uow.py │ │ │ └── test_views.py │ │ └── unit/ │ │ ├── __init__.py │ │ ├── test_batches.py │ │ ├── test_handlers.py │ │ └── test_product.py │ ├── refactoring.md │ ├── release-it.md │ ├── system-design-interview.md │ ├── tidy-first.md │ └── understanding-distributed-systems.md ├── case-studies/ │ └── reddit.md ├── conferences/ │ ├── aws-innovate-ai-ml-21.md │ ├── brown-bags.md │ └── pycon-2022.md ├── courses/ │ └── fast-ai.md ├── patterns/ │ ├── abbreviations.md │ └── architecture.md └── teaching/ ├── python-intermediate/ │ └── README.md └── python-intro/ ├── README.md └── notebook.ipynb ================================================ FILE CONTENTS ================================================ ================================================ FILE: .gitignore ================================================ .DS_Store .AppleDouble .LSOverride .idea .ipynb_checkpoints */.pytest_cache/ git-user.sh /excluded_resources/* ================================================ FILE: README.md ================================================ # 👉👉👉 Visit [musicat.fm](https://musicat.fm) 😻 You can connect Spotify and Apple Music to it to discover many cool statistics about your taste! (I'm the author 🤩) --- ## Notes ### Books 👀 In progress: - [System design interview](books/system-design-interview.md) #### ✅ Finished: - Code: - [Clean Code: A Handbook of Agile Software Craftsmanship](books/clean-code.md) - [Learning Go: An Idiomatic Approach to Real-World Go Programming](books/go/notes.md) - [Python Testing with Pytest](books/pytest/notes.md) - [Refactoring: Improving the Design of Existing Code](books/refactoring.md) - [Tidy first?](books/tidy-first.md) - Architecture: - [Architecture Patterns with Python](books/python-architecture-patterns/notes.md) - [Designing Data-Intensive Applications: The Big Ideas Behind Reliable, Scalable, and Maintainable Systems](books/ddia.md) - [Head First Design Patterns: Building Extensible and Maintainable Object-Oriented Software](books/head-first-design-patterns/notes.md) - [Release It! Design and Deploy Production-Ready Software](books/release-it.md) - [Fundamentals of Software Architecture](books/fundamentals-of-architecture.md) - Process: - [Clean Agile: Back to Basics](books/clean-agile.md) - [Domain-Driven Design: Tackling Complexity in the Heart of Software](books/ddd.md) - [Peopleware: Productive Projects and Teams](books/peopleware.md) - [The Pragmatic Programmer](books/pragmatic-programmer.md) - [Comic Agilé](books/comic-agile.md) - DevOps: - [The Kubernetes Book](books/kubernetes-book.md) - Product: - :eyes: - ML: - [Speech and Language Processing: An Introduction to Natural Language Processing, Computational Linguistics and Speech Recognition](books/nlp-book.md) #### ☑️ Finished partially: - [Code Complete: A Practical Handbook of Software Construction](books/code-complete.md) - [Cracking the Coding Interview](books/cracking-coding-interview/notes.md) - [Hands-On Machine Learning with Scikit-Learn, Keras, and TensorFlow: Concepts, Tools, and Techniques to Build Intelligent Systems](books/hands-on-ml.md) - [Build](books/build.md) - [Coaching Agile Teams](books/coaching-agile-teams.md) #### ⏳ Queue: - [Docker Deep Dive](books/docker-deep-dive.md) - [Software Architecture: The Hard Parts](books/architecture-hard-parts.md) - [Understanding Distributed Systems](books/understanding-distributed-systems.md) - [Kubernetes in Action](books/kubernetes-in-action.md) - [Elixir in Action](books/elixir.md) ### Case Studies - [Reddit](case-studies/reddit.md) ### Conferences - [PyCon 2022](conferences/pycon-2022.md) - [AWS Innovate: AI/ML Edition 2021](conferences/aws-innovate-ai-ml-21.md) - [Brown Bags](conferences/brown-bags.md) ### Patterns - [Abbreviations](patterns/abbreviations.md) - [Architecture](patterns/architecture.md) ### Teaching - [Introduction to Programming: Python for beginners](teaching/python-intro) - [Python Intermediate](teaching/python-intermediate) ### Courses - [Course @ FastAI](courses/fast-ai.md) ================================================ FILE: books/architecture-hard-parts.md ================================================ [go back](https://github.com/pkardas/learning) # Software Architecture: The Hard Parts: Modern Tradeoff Analysis for Distributed Architectures Book by Pramod Sadalage, Neal Ford, Mark Richards, Zhamak Dehghani ================================================ FILE: books/build.md ================================================ [go back](https://github.com/pkardas/learning) # Build Book by Tony Fadell - [1.1 Adulthood](#11-adulthood) - [1.2 Get a job](#12-get-a-job) - [1.3 Heroes](#13-heroes) - [1.4 Don't (only) look down](#14-dont-only-look-down) - [2.1 Just managing](#21-just-managing) - [2.2 Data versus opinion](#22-data-versus-opinion) - [2.3 Assholes](#23-assholes) - [2.4 I quit](#24-i-quit) - [3.1 Make the intangible tangible](#31-make-the-intangible-tangible) - [3.2 Why storytelling](#32-why-storytelling) - [3.3 Evolution versus disruption versus execution](#33-evolution-versus-disruption-versus-execution) - [3.4 Your first adventure - and your second](#34-your-first-adventure---and-your-second) ## 1.1 Adulthood When you are looking at the array of potential careers before you, the correct place to start is "What do I want to learn?" - NOT: How much money do I want to make? - NOT: What title do I want to have? - NOT: What company has enough name recognition? Early adulthood is about watching your dreams go up in flames and learning as much as you can from the ashes. Go where you can grow - people, mission, the opportunity are all that matters. > The only failure is your twenties is inaction. The rest is trial and error. Humans learn through productive struggle, by trying it themselves and screwing up and doing it differently next time. You have to push yourself to the mountain, even if it means you might fall of a cliff. ## 1.2 Get a job If you are going to throw your time, energy, and youth at a company, try to join one that's not just making a better mousetrap. Find a business that's starting a revolution: - it's creating a product that's wholly new or combines existing technology in a novel way that the competition can't make or even understand - this product solves a problem - a real pain point - that a lot of customers experience daily - the novel technology can deliver on the company vision - leadership is not dogmatic about what the solution looks like and is willing to adapt to their customers' needs - it's thinking about a problem or a customer need in a way you've never heard before, but makes a perfect sense once you hear it Cool technology isn't enough, a great team isn't enough, plenty of funding isn't enough. You have to time you product right. The world has to be ready to want it. If you're not solving a real problem, you can;t start a revolution. Seemingly impossible problems that a decade ago would have cost billions to solve, requiring massive investments from gian firms, can now be figured out with a smartphone app, a small sensor, and the internet. If you are passionate about something - something that could be solving a huge problem one day - then stick with it. Because one day, if you are truly solving a real issue, when the world is ready to want it, you will already be there. You don't have to an executive right away, you don't have to get a job at the most amazing, world-changing company out of college, but you should have a goal. ## 1.3 Heroes The only thing that can make a job truly amazing or complete waste of time is the people. You always have something to offer if you are curious and engaged. You can always trade and barter good ideas; you can always be kind and find a way to help. Try to get into a small company, the sweet spot is a business of 30-100 people building something worth building. You could go to Google, Apple, Facebook, or some other giant company, but it will be hard to maneuver yourself to work closely with the rock stars. Smaller companies still have specialization, but usually without silos. And they have a different energy. The whole company will be focused on working together to make one precious idea become reality. Anything unnecessary is shunned - red tape and politics are typically nonexistent. Being in that lifeboat with people you deeply respect is a joy. It is the best time you can have at work. It might be the best time you can have. ## 1.4 Don't (only) look down IC - individual contributor - a person who doesn't manage others. As an IC, you need to occasionally do 2 things: - look up - look beyond the next deadline or project, bne sure the mission still makes sense to you and that the path to reach it seems achievable - look around - get out of your comfort zone and away from the immediate team you are on,talk to the other functions in your company to understand their perspectives, needs, and concerns Don't think doing the work just means locking yourself in a room - a huge part of it is walking with your team. The work is reaching your destination together. Or finding a new destination and bringing your team with you. ## 2.1 Just managing 6 things you should know before becoming a manager: - You don't have to be a manager to be successful - many people wrongly assume that the only path to more money and stature is managing a team. There are alternatives that will enable you to get a similar paycheck. - Remember that once you become a manager, you will stop doing the thing that made you successful in the first place - your job will be communication, communication, communication, recruiting, hiring, firing, setting budgets, reviews, one-to-one meetings, setting goals, keeping people on track, resolving conflicts, mentoring, ... - Becoming a manager is a discipline - management is a learned skill, not a talent. - Being exacting and expecting great work is not micromanagement - your job is to make sure the team produces high-quality work, it only turns into micromanagement when you dictate the step-by-step process. - Honesty is more important than style - you can be successful with any style as long as you never shy away from respectfully telling people the uncomfortable, hard truth needs to be said. - Don't worry that your team will outshine you - in fact, it's your goal, you should always be training someone on your team to do your job, the better they are, the easier it is for you to move up and even start managing managers When you are a manager, you are no longer just responsible for the work. You are responsible for human beings. A star individual contributor is incredibly valuable. Valuable enough that many companies will pay them just as much as they'd pay a manager. A truly great IC will be a leader in their chosen function and also become an informal cultural leader, someone who people across the company will seek out for advice and mentorship. Examining the product in detail and caring deeply about the quality of what your team is producing is not micromanagement. That's exactly what you should be doing. Steve Jobs was bringing out a jeweler's loupe and looking at individual pixels on a screen to make sure the user interface graphics were properly drawn. As a manager, you should be focused on making sure the team is producing the best possible product. It is very easy to turn 1:1s into a friendly chats that go nowhere, so clear meeting agenda can be beneficial. If you are a manager - congrats, you're now a parent. Not because you should treat your employees like children, but because it's now your responsibility to help them work through failure and find success. ## 2.2 Data versus opinion Data driven decisions - you can acquire, study, and debate facts - relatively easy to make. Opinion-driven - follow your gut and your vision - always hard and always questioned. Make decisions, not everyone has to agree - it happens when one person has to make the final call. This isn't a democracy, nor dictatorship - you can't give orders without explaining yourself. Storytelling is how you get people to take a leap of faith to do something new. Creating a believable narrative that everyone can latch on to is critical to moving forward and making hard choices. It's all that marketing comes down to. You are selling - vision, guy, opinion. > It's not data or intuition, it's data and intuition. ## 2.3 Assholes Up to 12 percent of corporate senior leadership exhibit psychopathic traits. There are different assholes: - Political assholes - people who master the art of corporate politics, but then do nothing but take credit for everyone else's work. These assholes usually build a coalition of budding assholes around them - Controlling assholes - micromanagers who systematically strangle the creativity and juy out of their team. They never give people credit for their work, never praise it, and often steal it. - Asshole assholes - they suck at work and everything else, mean jealous, insecure jerks. They cannot deliver, are deeply unproductive, so they do everything possible to deflect attention away from themselves. They are generally out of door pretty quickly. - Mission-driven "assholes" - crazy passionate - they are neither easygoing nor easy to work with. Unlike true assholes, they care. Pushing for greatness doesn't make you an asshole. Not tolerating mediocrity doesn't make you na asshole. You need to understand their motivations. Controlling assholes won't listen. They will never admit they screwed up. Things you can do when faced with a controlling asshole: - kill'em with kindness - ignore them - try to get around them - quit Most people aren't assholes. And even if they are, they are also human. So don't walk into a job trying to get anyone fired. Start with kindness. Try to make peace. Assume the best. ## 2.4 I quit Sometimes you need to quit. Here is how you know: - You are no longer passionate about the mission - every hour at your desk feels like an eternity - You have tried everything - the company is letting you down Once you do decide to quit, make sure you leave in the right way - try to finish as much as possible, find natural breakpoint in your project. Hating your job is never worth whatever raise, title, or perks they throw at you to stay. The threat of leaving may be enough to push your company to get serious and make whatever change you are asking for. But it might not. Quitting should not be a negotiating tactic - it should be the very last card you play. Good things take time, big times take longer. If you flit from project to project, company to company, you will never have the vital experience of starting and finishing something meaningful. ## 3.1 Make the intangible tangible Don't just make a prototype of your product and think you're done. Prototype as much of the full customer experience as possible. Your product isn't only your product. It's the whole user experience. The customer journey and touchpoints: - awareness (PR, search, social media, ads) - education (website, email, blog, trial/demo) - acquisition (partners, payment model) - product (design, UX, performance) - onboarding (quick guide, account creation, tips, how-to videos) - usage (reliability, usability, updates, lifespan) - support (troubleshooting, knowledge base, call center, community) - loyalty (new product, newsletter, promotions, ratings/reviews) ## 3.2 Why storytelling Every product should have a story, a narrative that explains why it needs to exist and how will it solve your customer's problems. A good product story: - it appeals to people's rational and emotional sides - it takes complicated concepts and makes them simple - it reminds people of the problem that's being solved - it focuses on the why The story of your product, your company, and your vision should drive everything you do. Virus of a doubt: "it is a way to get into people's heads, remind them about a daily frustration, get them annoyed about it all over again. You get them angry about how it works now so they can get excited abut a new way of doing things." Product's story is its design, features, images, videos, quotes from customers, tips from reviewers. The sum of what people see and feel about this thing that you have created. Why does this thing need to exist? Why does it matter? Why will people need it? Why will they love it? The longer you work on something, the more the "what" takes over the "why". When you get wrapped in the "what", you get ahead of people. You think everyone can see what you see. But they don't. Earn their trust by showing that you really know your stuff or understand their needs. Of offer them something useful, connect with them in a new way, so they feel assured that they're making the right choice with your company. Appeal to their emotions, connect with something they care about. Their worries, their fears. Every person is different, and everyone will read your story differently. Analogies can be a useful tool in storytelling. They create a shorthand for complicated concepts. ## 3.3 Evolution versus disruption versus execution Evolution - a small, incremental step to make something better Disruption - a fork on the evolutionary tree - something fundamentally new that changes the status quo, usually by taking a novel or revolutionary approach to an old problem Execution - doing what you have promised to do and doing it well Your version one product should be disruptive, not evolutionary. But disruption alone will not guarantee success. Continue to evolve, but always seek out new ways to disrupt yourself. Disruption should be important for you personally. If you've truly made something disruptive, your competition probably won't be able to replicate it quickly. Just don't overshoot. Don't try to disrupt everything at once. As your disruptive product, process, or business model begins to gain steam with customers, your competitors will start to get worried. They'll start paying attention, they will get pissed. When companies get angry they undercut your pricing, try to embarrass you with marketing, use negative press, put in new agreements with sales to lock you out of the business. And they might sue you. If they can't innovate, they litigate. The good news is that a lawsuit means you've officially arrived (you are a real threat, and they know it). Disruptions - extremely delicate balancing act: - you focus on making one amazing thing but forget that it has to be part of a single, fluid experience - beautiful execution on everything else but the one thing that would have differentiated your product withers away - you change too many things too fast and regular people can't recognize or understand what you have made, you can't push people too far outside their mental model, not at first Challenge yourself, over-deliver, create excellent solutions. If you do it right, one disruption will fuel the next. One revolution will domino another. ## 3.4 Your first adventure - and your second When releasing V1 you have the following tools to make decisions: Vision, Customer insights, Vision. Once you start iterating on an existing product, you will have experience and data, so you can use your existing tools but in different order: Data, Customer insights, Vision. Locking yourself alone in a room to create a manifesto of your single, luminous vision looks and feels indistinguishable from completely loosing your mind. Get at least one person - but preferably a small group - to bounce ideas off of. Sketch your ideas together, then fulfill it together. ================================================ FILE: books/clean-agile.md ================================================ [go back](https://github.com/pkardas/learning) # Clean Agile: Back to Basics Book by Robert Cecil Martin - [Chapter 1: Introduction to Agile](#chapter-1-introduction-to-agile) - [Chapter 2: The Reasons For Agile](#chapter-2-the-reasons-for-agile) - [Chapter 3: Business Practices](#chapter-3-business-practices) - [Chapter 4: Team Practices](#chapter-4-team-practices) - [Chapter 5: Technical Practices](#chapter-5-technical-practices) - [Chapter 6: Becoming Agile](#chapter-6-becoming-agile) - [Chapter 7: Craftsmanship](#chapter-7-craftsmanship) - [Chapter 8: Conclusion](#chapter-8-conclusion) - [Afterword](#afterword) ## Chapter 1: Introduction to Agile The Agile Manifesto was written in February 2001 in Utah by 17 software experts. Once a movement become popular, the name of that movement got blurred through misunderstanding and usurpation. When did Agile begin? More than 50 000 years ago when humans first decided to collaborate on a common goal. The idea of choosing small intermediate goals and measuring the progress after each is too intuitive, and too human, to be considered any kind of revolution. Agile was not the only game in town: - Scientific Management - top-down, command-and-control approach. Big up-front planning followed by careful detailed implementation. Worked best for projects that suffered a high cost of change and solved very well-defined problems with extremely specific goals. - Waterfall - logical descendant of Scientific Management. Even though it was not what the author was recommending, it was the concept people took away from his paper. And it dominated the next 3 decades. It dominated but it didn't work. How could thoroughly analyzing the problem, carefully designing a solution, and then implementing that design fail so spectacularly over and over again. The beginnings of the Agile reformation began in the late 1980s. In 1995 a famous paper on Scrum was written. The Preamble of the Agile Manifesto: > We are uncovering better ways of developing software by doing it and helping others do it. The Agile Manifesto: > **Individuals and interactions** over processes and tools. > **Working software** over comprehensive documentation. > **Customer collaboration** over contract negotiation. > **Responding to change** over following a plan. The Iron Cross of project management: good, fast, cheap, done - pick any three you like, you will not have the fourth. A good manager drives a project to be good enough, fast enough, cheap enough and done as much as necessary. This is kind of management that agile strives to enable. Agile is a _framework_ that helps developers and managers execute this kind of pragmatic project management. However, such management is not done automatic. It is entirely possible to work within Agile framework and still completely mismanage the project and drive it to failure. Agile provides data. An Agile development team produces just the kinds of data that managers need in order to make good decisions: - Velocity - how much the development team has gotten done every week. - Burn-down chart - shows how many points remain until the next major milestone. Has a slope that predicts when the milestone will probably be reached. This data managers need to decide how to set the coefficients on the Iron Cross and drive the project to the best possible outcome. Agile development is first and foremost a feedback-driven approach. Each, week, each day, each hour, and even each minute is driven by looking at the results of the previous week, day, hour and minute, and then making the appropriate adjustments. The Date (deadline) is usually fixed and is not going to change because some developers think they may not be able to make it. At the same time, the requirements are wildly in flux and can never be frozen. This is because the customers don't really know what they want. So the requirements are constantly being re-evaluated and re-thought. The Waterfall model promised to give us a way to get our arms around this problem: - The Analysis Phase - no real consensus on just what analysis is, the best definition: "it is what analyst do". - The Design Phase - is where you split the project up into modules and design interfaces between those modules. - The Implementation Phase - there is no way to successfully pretend it is done, meanwhile, the requirements are still coming. - The Death March Phase - customers are angry, stakeholders are angry, the pressure mounts, people quit. Hell. It can be called - Runway Process Inflation - we are going to do the thing that did not work, and do it a lot more of it. Of course Waterfall was not an absolute disaster. It did not crush every software project into rubble. But it was, and remains, a disastrous way to run a software project. The Waterfall just makes so much sense. First, we analyze the problem, then we design the solution, and then we implement the design. Simple. Direct. Obvious. And wrong. An Agile project begins with analysis, but it is an analysis that never ends. Time before deadline is divided into regular increments called _iterations_ or _sprints_. The size of an iteration is usually one or two weeks. The first iteration (Iteration Zero). is used to generate a short list of features (stories). Iteration Zero is used to set up development environment, estimate the stories and lay out the initial plan. This process of writing stories, estimating them, planning them and designing never stops. Every iteration will have some analysis and design and implementation in it. In Agile project, we are always analyzing and estimating. Software is not a reliably estimable process. We programmers simply do not know how long things will take. There is no way to know how complicated a task is going to be until that task is engaged and finished. After a couple of iterations we get insight how much time will be needed basing on past iterations. This number averages at a relatively stable velocity. After four or five iterations, we will have a much better idea when this project will be done. We practice Agile in order to destroy hope before that hope can kill the project. Hope is the project killer. Hope is what makes a software team mislead managers abut their true progress. Hope is a very bad way to manage a software project. And Agile is a way to provide an early and continuous dose of cold, hard reality as a replacement for hope. Some folks think that Agile is about going fast. It is not. Agile is about knowing, as early as possible, just how screwed we are. The reason we want to know this as early as possible is so that we can manage the situation. Managers manage software projects by gathering data and then making the best decisions they can base on that data. Managers do this by making changes to the scope, the schedule, the staff, and the quality: - Changing the Schedule - ask stakeholders if we can delay the project. Do this as early as possible. - Adding Staff - in general, business is simply not willing to change the schedule. When new staff is added, productivity plummets for a few weeks as the new people suck the life out of the old people. Then, hopefully, the new people start to get smart enough to actually contribute. Of course, you need enough time, and enough improvement, to make up for the initial loss. - Decrease Quality - everyone knows that you can go much faster by producing crap. WRONG. There is no such thing as quick and dirty. Anything dirty is slow. **The only way to go fast, is to go well**. If we want to shorten our schedule, the only option is to _increase_ quality. - Changing Scope - if the organization is rational, then the stakeholders eventually bow their heads in acceptance and begin to scrutinize the plan. Inevitably the stakeholders will find a feature that we have already implemented and then say "It is a real shame you did that one, we sure do not need it". At the beginning of each iteration, ask the stakeholders which features to implement first. 20 000 foot view of Agile: > Agile is a process wherein a project is subdivided into iterations. The output of each iteration is measured and used > to continuously evaluate the schedule. Features are implemented in the order of business value so that the most > valuable things are implemented first. Quality is kept as high as possible. The schedule is primarily managed by > manipulating scope. ## Chapter 2: The Reasons For Agile Agile is important because of professionalism and the reasonable expectations from our customers. - Professionalism - nowadays the cost of software failure is high, therefore we need to increase our professionalism. We are surrounded by computers, and they all need to be programmed - they all need software. Nowadays, virtually nothing of significance can be done without interacting with a software system. Now our actions are putting lives and fortunes at stake. - Reasonable Expectations - meeting expectations is one of primary goals of Agile development. - we will not ship sh*t - Agile's emphasis on Testing, Refactoring, Simple Design and customer feedback is the obvious remedy for shipping bad code. - continuous technical readiness - system should be technically (solid enough to be deployed) deployable at the end of every iteration. - stable productivity - big redesigns are horrifically expensive and seldom are deployed. Developers instead, should continuously keep the architecture, design and code as clean as possible, this allows to keep their productivity high and prevent the otherwise inevitable spiral into low productivity and redesign. - inexpensive adaptability - software - soft (easy to change), ware (product). Software was invented because we wanted a way to quickly and easily change the behavior of our machines. Developers should celebrate change because that is why we are here. Changing requirements is the name of the whole game. Our jobs depend on our ability to accept and engineer changing requirements and to make those changes relatively inexpensive. If a change to the requirements breaks your architecture, then your architecture sucks. - continuous improvement - the older a software system is, the better it should be. Unfortunately it seldom happens. We make things worse with time. The Agile practices of Pairing, TDD, Refactoring, and Simple Design strongly support this expectation. - fearless competence - people are afraid of changing bad code, you can break it, and if it breaks it will become yours. This fear forces you to behave incompetently. Customers, users, and managers expect _fearless competence_. They expect that if you see something wrong or dirty, you will fix it and clean it. They don't expect you to allow problems to fester and grow - they expect you to stay on top of the code, keeping it as clean and clear as possible. How to eliminate that fear? Use TDD. - qa should find nothing - the Agile practices support this expectation. - test automation - manual tests are always eventually lost. Manual tests are expensive and so are always a target for reduction. Besides, asking humans to do what machines can do is expensive, inefficient, and immoral. Every test that can be feasibly automated must be automated. Manual testing should be limited to those things that cannot be automatically validated and to the creative discipline of Exploratory Testing. - we cover for each other - each individual member of a software team makes sure that there is someone who can for him if he goes down. It is your responsibility to make sure that one or more of your teammates can cover for you. - honest estimates - you should provide estimates based on what you do and do not know. You can estimate in relative terms (task B should take half of the time spent on task A), you can also estimate using ranges. - you need to say "no" - when answer for something is "no", then the answer is really "no". For example if solution for a problem can not be found. - continuous aggressive learning - our industry changes quickly. We must be able to change with it. So learn, learn, learn! Learn with or without company's help. - mentoring - the best way to learn is to teach. So when new people join the team, teach them. Learn is to teach other. Customer Bill of Rights: - You have the right to an overall plan and to know what can be accomplished when and at what cost. - We cannot agree to deliver fixed scopes on gard dates. Either the scopes or the dates must be soft. - You have the right to get the most possible value out of every iteration. - The business has the right to expect that developers will work on the most important things at any given time, and that each iteration will provide them the maximum possible usable business value. - You have the right to see progress in a running system, proven to work by passing repeatable tests that you specify. - You have the right to change your mind, to substitute functionality, and to change priorities without paying exorbitant costs. - You have the right to be informed of schedule and estimate changes, in time to choose how to reduce the scope to meet a required date. You can cancel at any time and be left with a useful working system reflecting investment to date. Developer Bill of Rights: - You have the right to know what is needed with clear declarations of priority. - Developers are entitled to precision in requirements and in the importance of those requirements. This right applies within the context of an iteration. Outside an iteration, requirements and priorities will shift and change. - You have the right to produce hugh-quality work at all times. - The business has no right to tell developers to cut corners or do low quality work. Or, to say this differently, the business has no right to force developers to ruin their professional reputations or violate their professional ethics. - You have the right to ask for and receive help from peers, managers, and customers. - This statement gives programmers the right to communicate. - You have the right to make and update your estimates. - You can change your estimate when new factors come to light. Estimates are guesses that get better with time. Estimates are never commitments. - You have the right to accept your responsibilities instead of having them assigned to you. - Professionals accept work, they are not assigned work. A professional developer has every right to say "no" to a particular job or task. It may be that the developer does not feel confident in their ability to complete the task, or it may be that the developer believes the task better suited for someone else. Or, it may be that the developer rejects the task for personal or moral reasons. Acceptance implies responsibility. > Agile is a set of rights, expectations, and disciplines of the kind that form the basis of an ethical profession. ## Chapter 3: Business Practices If you would like an accurate and precise estimate of a project, then break it down into individual lines of codes. The time it takes you to do this will give you a very accurate and precise measure of how long it took you to build the project. Trivariate Analysis - such estimates are composed of three numbers: best-case, nominal-case, and worst-case. These numbers are confidence numbers. The worst-case number is the amount of time which you feel 95% confident that the task will be completed. The nominal-case has only 50% confidence, and the best case only 5%. Stories and Points - a user story is an abbreviated description of a feature of the system, told from the point of view of a user. We want to delay the specification of those details as long as possible, right up to the point where the story is developed. Story points are a unit of estimated effort, not real time. They are not even estimated time - they are estimated effort. Velocity is not a commitment. The team is not making a promise to get 30 points done during the iteration. They aren't even making the promise to try get 30 points done. This is nothing more than their best guess as to how many points will be complete by the end of the iteration. The Four-Quadrant Game (The Highest Return of Investment) - the stories that are valuable but cheap will be done right away. Those that are valuable but expensive will be done later. Those that are neither valuable nor expensive might get done one day. Those that are not valuable but are expensive will never be done. Yesterday's weather - the best predictor of today's weather is yesterday's weather. The best predictor of the progress of an iteration is the previous iteration. The project is over when there are no more stories in the deck worth implementing. User stories are simple statements that we use as reminders of features. We try not to record too much detail when we write the story because we know that those details will likely change. Stories follow a simple set of guidelines that we remember with the acronym INVEST: - I - Independent - they do not need to be implemented in any particular order. This is a soft requirement because there may be stories that depend on other stories. Still, we try to separate the stories so that there is little dependence. - N - Negotiable - we want details to be negotiable between the developers and the business. - V - Valuable - the story must have clear and quantifiable value to the business. Refactoring/Architecture/Code cleanup is never a story. A story is always something that the business values. - E - Estimable - must be concrete enough to allow the developers to estimate it. - S - Small - a user story should be larger than one or two developers can implement in a single iteration. - T - Testable - the business should be able to articulate tests that will prove that the story has been completed. There are number of schemes for estimating stories: - Flying Fingers - Planning Poker A spike is a meta-story, or a story for estimating a story. It is a spike because it often requires us to develop a long but very thin slice through all the layers of the system. For example, there is a story you cannot estimate: Print PDF - you have never used the PDF library. So you write a new story called Estimate Print PDF - now you estimate that story, which is easier to estimate. The goal of each iteration is to produce data by getting stories done. The team should focus on stories rather than tasks within stories. It is far better to get 80% of the stories done than it is to get each story 80% done. Focus on driving the stories to completion. A story cannot be completed without the acceptance tests. If QA continues to miss the midpoint deadline, one iteration after another, then the ratio of QA engineers to developers is likely wrong. After the midpoint, if all the acceptance tests are done, QA should be working on the tests for the next iteration. The definition of done is this: acceptance tests pass. If we see a positive slope in velocity, it likely does not mean that the team is actually going faster. Rather, it probably means that the project manager is putting pressure on the team to go faster. As that pressure builds, the team will unconsciously shift the value of their estimates to make it appear that they are going faster. This is simple inflation. The points are a currency, and the team is devaluing them under external pressure. The lesson is that velocity is a measurement not an objective. Don't put pressure on the thing you are measuring. Estimate is not a promise, and the team has not failed if the actual velocity is lower. The practice of Small Releases suggest that a development team should release their software as often as possible. The new goal, is Continuous Delivery - the practice of releasing the code to production after every change. Acceptance Tests - Requirements should be specified by the business. BDD - Behavior-Driven Development - the goal is to remove the techie jargon from the tests and make the tests appear more like specifications that businesspeople would appreciate. At first, this was just another attempt at formalizing the language of testing, in this case using 3 special adverbs: Given, When, and Then. ## Chapter 4: Team Practices A metaphor can provide a vocabulary that allows the team to communicate effectively. On the other hand, some metaphors are silly to the point of being offensive to the customer. DDD solved the metaphor problem. Eric Evans coined the term _Ubiquitous Language_. What the team needs is a model of the problem domain, which is described by a vocabulary that everyone (the programmers, QA, managers, customers, users) agrees on. The Ubiquitous Language is used in all parts of the project. It is a thread of consistency that interconnects the entire project during every phase of its lifecycle. A software project is not a marathon, not a sprint, nor a sequence of sprints. In order to win, you must pace yourself. If you leap out of the blocks and run full speed, you will run out of energy long before you cross the finish line. You must run at a Sustainable Pace. If you try to run faster than the pace you can sustain, you will have to slow down and rest before you reach the finish line. Managers may ask you to run faster than you should. You must not comply. It is your job to husband your resources to ensure that you endure the end. > Working overtime is not a way to show your dedication to your employer. What it shows is that you are a bad planner, > that you agree to deadlines to which you shouldn't agree, that you make promises you shouldn't make, that you are a > manipulable laborer and not a professional. This is not to say that all overtime is bad, nor that you should never > work overtime. There are extenuating circumstances for which the only option is to work overtime. But they should be > extremely rare. And you must be aware that the cost of that overtime will likely be greater than the time you save on > the schedule. The most precious ingredient in the life of a programmer is sufficient sleep. Make sure you know how many hours of sleep your body needs, and then prioritize those hours. Those hours will more than pay themselves back. No one owns the code in an Agile project. The code is owned by the team as a whole. Any member of the team can check and improve any module in the project at any time. The team owns the code collectively. Collective Ownership does not mean that you cannot specialize. However, even as you specialize, you must also generalize. Divide your work between your specialty and other areas of the code. Maintain your ability to work outside your specialty. The continuous build should never break. Standup Meeting: - This meeting is optional. Many teams get by just fine without one. - It can be less often than daily. Pick the schedule that makes sense to you. - It should take ~10 minutes, even for large teams. - This meeting follows a simple formula. The basic idea is that the team members stand in a circle and answer 3 questions: 1. What did I do since the last meeting? 2. What will I do until the next meeting. 3. What is in my way? 4. [Optional] Whom do you want to thank? No discussion. No Posturing. No deep explanations. No complaints. Everybody gets 30 seconds to answer those 3 questions. ## Chapter 5: Technical Practices Without TDD, Refactoring, Simple Design and Pari Programming, Agile becomes an ineffective flaccid shell of what it was intended to be. TEST-DRIVEN DEVELOPMENT. Every required behavior should be entered twice: once as a test, and then again as production code that makes the test pass. The 3 rules of TDD: 1. Do not write any production code until you have first written a test that fails due to the lack of that code. 2. Do not write more of a test that is sufficient to fail - and failing to compile counts as a failure. 3. Do not write more production code that is sufficient to pass the currently failing test. The tests are a form of documentation that describe the system being tested. This documentation is written in a language that the programmers know fluently. It is utterly unambiguous, it is so formal it executes, and it cannot get out of sync with the application code. The test are the perfect kind of documentation for programmers: code. Remember that function that is hard to test after the fact? The function is hard to test because you did not design it to be easy to test. You wrote the code first, and you are now writing the tests as and afterthought. By writing the tests first, you will decouple the system in ways that you had never thought about before. The whole system will be testable, therefore, the whole system will be decoupled. REFACTORING. Refactoring is the practice of improving the structure of the code without altering the behavior, as defined by tests. In other words, we make changes to the names, the classes, the functions and the expressions without breaking any of the tests. Red/Green/Refactor: 1. We create a test that fails. 2. Then we make the test pass. 3. Then we clean up the code. 4. Return to step 1. The word Refactoring should never appear on a schedule. Refactoring is not the kind of activity that appears on a plan. We do not reserve time for refactoring. Refactoring is simply part of our minute-by-minute, hour-by-hour approach to writing software. Sometimes the requirements change is such a way that you realize the current design and architecture of the system is suboptimal, and you need to make a significant change to the structure of the system. Such changes are made within the Red/Green/Refactor cycle. We do not create a project specifically to change the design. We do not reserve time in the schedule for such large refactorings. Instead, we migrate the code one small step at a time, while continuing to add new features during normal Agile cycle. SIMPLE DESIGN. The practice of Simple Design is one of the goals of Refactoring. Simple Design is the practice of writing only the code that is required with a structure that keeps it simplest, smallest, and the most expressive. Rules of Simple Design: 1. Pass all the tests. 2. Reveal the intent - It should be easy to read and self-descriptive. This is where we apply many of the simpled and more cosmetic refactorings. We also split large functions into smaller, better-named functions. 3. Remove duplication. 4. Decrease elements - Once we have removed all the duplication, we should strive to decrease the number of structural elements, such as classes, functions, variables. The more complex the design, the greater the cognitive load placed on the programmers. That cognitive load is Design Weight. The greater the weight of that design, the more time and effort are required for the programmers to understand and manipulate the system. PAIR PROGRAMMING. Pairing is the act of two people working together on a single programming problem. Any configuration is fine (the same workspace, sharing the screen, keyboard, ping-pong, ...). We pair so that we behave like a team. When a member of a team goes down, the other team members cover the hole left by that member and keep making progress towards the goal. **Pairing is the best way, by far, to share knowledge between team members and prevent knowledge silos from forming. It is the best way to make sure that nobody on the team is indispensable.** The word "pair" implies that there are just 2 programmers involved in a pairing session. While this is typically true, it is not a rule. Generally, managers are pleased to see programmers collaborating and working together. It creates the impression that work is being done. **Never, ever, ever, ask for permission to pair. Or test. Or refactor. Or... You are the expert. You decide.** ## Chapter 6: Becoming Agile Agile Values: 1. Courage - It is reckless to conform to a schedule by sacrificing quality. The belief that quality and discipline increase speed is a courageous belief because will constantly be challenged by powerful but naive folks who are in a hurry. 2. Communication - A team that sits together and communicates frequently can work miracles. We value direct and frequent communication that crosses channels. Face-to-face, informal, interpersonal conversations. 3. Feedback - Maximize the frequency and quantity of feedback. They allow us to determine when things are going wrong early enough to correct them. They provide massive education about the consequences of earlier decisions. 4. Simplicity - Numbers of problems should be reduced to minimum. Therefore, indirection can be kept to a minimum. Solutions can be simple. This applies to the software, but it also applies to the team. Passive aggression is indirection. Keep the code simple. Keep the team simpler. These values are diametrically opposed to the values of large organisations who have invested heavily in middle-management structures that value safety, consistency, command-and-control, and plan execution. It is not really possible to transform such an organisation to Agile. Agile coaches are members of the team whose role is to defend the process within the team. In the heat of development, developers may be tempted to go off process. Perhaps they inadvertently stop pairing, stop refactoring, or ignore failures in the continuous build. The coach acts as the team's conscience, always reminding the team of the promises they made to themselves and the values they agreed to hold. This role typically rotates from one team member to the next on an informal schedule and based on need. A mature team working steadily along does not require a coach. On the other hand, a team under some kind of stress (schedule, business or interpersonal) may decide to ask someone to fill the role temporarily. Every member of an Agile team needs to understand the values and techniques of Agile. Therefore, if one member of the team is trained, all members of the team should be trained. Agile is for small- to medium-sized teams. period. It works well for such teams. Agile was never intended for large teams. The problem of large teams is a problem societies and civilizations. And large teams are a solved problems. Agile was invented because we did not know how to effectively organize a relatively small group of programmers to be effective. Software development needed its own process because software is really like nothing else. The answer to the question of Agile in the large is simply to organize your developers into small Agile teams, then use standard management and operations research techniques to manage those teams. Great tools do the following: - Help people accomplish their objectives - Can be learned "well enough" quickly - Become transparent to users - Allow adaptation and exaptation - Are affordable Git is an example of a great tool. Your team should establish the pattern of work compatible with their specific context first, and then consider using tools that support their workflow. Workers use and control tools, tools don't control and use people. You don't want to get locked into other people's process flows. ALM - Agile Lifecycle Management systems despite being feature rich and commercially successful, ALM tools utterly fail at being great.: - ALMs tend to be complicated, usually demanding up-front training. - These tools often require constant attention. - ALM tools aren't always easily adapted. - ALM tools can be expensive. - ALM does rarely work the way your team does, and often their default mode is at odds with Agile methods. For example many ALM tools assume that team members have individual work assignments, which makes them nearly unusable for teams who work together in a cross-functional way. You can try different forms of Agile practices and check which one is the most relevant to your team's needs: - Kanban - making the work visible, limiting work in progress and pulling work through the system. - Scrum and XP - short daily meetings, a product owner, a process facilitator (Scrum Master), retrospectives, a cross-functional team, user stories, small releases, refactoring, writing tests first, and pair programming. - Align team events - when the team events across multiple teams (standups, retrospectives) are aligned in time, it is possible to then roll up daily and systematic impediments via an escalation tree. - Escalation trees - if it makes sense to always work on items that produce the highest value, then it makes sense to escalate impediments immediately via a well-defined escalation path. - Regular interteam interaction - regular interaction between the Scrum Masters, Product Owners and team members who are working together toward a common deliverable. - Portfolio Kanban - sets work in progress limits at the initiative level in order to ensure that the organization is focused on the highest-value work at all times. - Minimum Viable Increments - what is the shortest path to producing the highest value in the shortest time. A growing number of organizations are taking this to extreme by implementing Continuous Delivery - releasing small updates on a frequent basis, sometimes as frequently as multiple times per day. Enablers of multiteam coordination: - SOLID - especially useful for simplifying multiteam coordination by dramatically reducing dependencies. - Small, valuable user stories - limit the scope of dependencies, which simplifies multiteam coordination. - Small, frequent releases - whether these releases are delivered to the customer or not, the practice of having a releasable product across all the teams involved helps to surface coordination and architectural issues so that the root cause can be found and addressed. - Continuous Integration - calling for integration across the entire product after every checkin. - Simple Design - one of the hardest practices to learn and apply because it is one of the most counter-intuitive practices. When coordinating the work of massive dependencies between teams, monolithic, centralized, preplanned architectures create massive dependencies between teams that tend to force them to work in lock step, thus defeating much of the promise of Agile. Simple Design, especially when used with practices such as a microservices architecture, enables Agility in large. ## Chapter 7: Craftsmanship Many companies misunderstood Agile. Managers are willing to push developers to work faster and are using the full transparency of the process to micromanage them. Developers are pushed hard to fit their estimates into the imposed milestones. Failing to deliver all story points in a sprint means developer must work harder in the next sprint to make up the delay. If the product owner thinks developers are spending too much time on things like automated tests, refactoring, or pairing they simply tell them to stop doing it. Strategic technical work has no place in _their_ Agile process. There is no need for architecture or design. The order is to simply focus on the highest-priority item in the backlog and get it done as fast as possible. This approach results in a long sequence of iterative tactical work and accumulation of technical debt. Bugs are accumulating, delivery time goes up, people start to blame one another. > Companies are still not mature enough to understand that technical problems are in fact business problems. A group of developers met in November 2008 in Chicago to create a new movement: Software Craftsmanship. Manifesto: As aspiring Software Craftsmen, we are raising the bar or professional software development by practicing it and helping others learn the craft. Through this work we have come to value: - Not only working software, but also well-crafted software. - Not only responding to change, but also steadily adding value. - Not only individuals and interactions, but also a community of professionals. - Not only customer collaboration, but also productive partnership. The Software Craftsmanship manifesto describes an ideology, a mindset. It promotes professionalism through different perspectives. **Well-crafted software** - code that is well-designed and well tested. It is code that we are not scared to change and code that enables business to react fast. It is code that is both flexible and robust. **Steadily adding value** - no matter what we do, we should always be committed to continuously provide increasing value to our clients and customers. **A community of professionals** - we are expected to share and learn with each other, raising the bar of our industry. We are responsible for preparing the next generation of developers. **Productive partnership** - we will have a professional relationship with our clients and employers. We will always behave ethically and respectfully, advising and working with our clients and employers in the best way possible. We will expect a relationship of mutual respect and professionalism. We will look at our work not as something we need to do as part of a job but as a professional service we provide. We will take ownership of our own careers, investing our own time and money to get better at what we do. Craftspeople strive to do the best job they can, not because someone is paying, but based on a desire to do things well. Developers should not ask for authorization for writing tests. They should not have separate tasks for unit testing or refactoring. These technical activities should be factored into the development of any feature. They are not optional. Managers and developers should only discuss what is going to be delivered and when, not how. Every time developers volunteer details of how they work, they are inviting managers to micromanage them. Developers should be able to clearly describe how they work and the advantages of working that way to whomever is interested. What developers should not do is to let other people decide how they work. Conversations between developers and business should be about why, what and when - not how. Craftsmanship promotes software development as a profession. A profession is part of who we are. A job is a thing that we do but is not part of who we are. A profession is something we invest in. It is something we want to get better at. We want to gain more skills and have a long-lasting and fulfilling career. Combining Agile and Craftsmanship is the perfect way to achieve business agility. ## Chapter 8: Conclusion This book covered basics of Agile. ## Afterword Ask the developers in an "Agile organization" what Agile is, and you will likely get a very different answer than if you ask anyone beyond the level of a software development manager. Developers understand Agile to be a methodology for streamlining the development process and for making software development more predictable, more practicable, and more manageable. Many developers are blissfully unaware of management's use of the metrics provided by the implementation of Agile practices and the data it produces. ================================================ FILE: books/clean-code.md ================================================ [go back](https://github.com/pkardas/learning) # Clean Code: A Handbook of Agile Software Craftsmanship Book by Robert Cecil Martin - [Chapter 1: Clean Code](#chapter-1-clean-code) - [Chapter 2: Meaningful names](#chapter-2-meaningful-names) - [Chapter 3: Functions](#chapter-3-functions) - [Chapter 4: Comments](#chapter-4-comments) - [Chapter 5: Formatting](#chapter-5-formatting) - [Chapter 6: Objects and Data Structures](#chapter-6-objects-and-data-structures) - [Chapter 7: Error Handling](#chapter-7-error-handling) - [Chapter 8: Boundaries](#chapter-8-boundaries) - [Chapter 9: Unit Tests](#chapter-9-unit-tests) - [Chapter 10: Classes](#chapter-10-classes) - [Chapter 11: Systems](#chapter-11-systems) - [Chapter 12: Emergence](#chapter-12-emergence) - [Chapter 13: Concurrency](#chapter-13-concurrency) - [Chapter 17: Smells and Heuristics](#chapter-17-smells-and-heuristics) ## Chapter 1: Clean Code - ugly code is expensive - take your time to write a good code - bad code programmer's fault, not PO's, manager's or anyone's else - bad code is like a building with broken windows - people see ugly building and stop caring - code like a prose, code should look like you care - make the language look like it was made of the problem - code rot quickly ## Chapter 2: Meaningful names Variable name should answer all the questions. It should tell why it exists. If a name requires a comment it does not reveal its content. Names should be pronounceable. One letter variables are hard to `grep` in the code - should be ONLY as local variables inside short methods. The length of a name should correspond to the size of its scope. Avoid encodings. > Difference between a smart programmer and a professional programmer is that professional programmer understands that > **clarity is a king**. Don't be funny 😔 People tend to forget jokes, so people will forget true meaning of a variable. Choose clarity over entertainment. Do not use slang or culture-dependant names. Pick one word per concept, e.g. `get` instead of `fetch`, `retrieve`, ... ## Chapter 3: Functions Functions are the first line of organisation in any program. Functions should be small. No more than 2-3 indents. > Functions should do one thing. They should do it well. They should do it only. The reason we write functions is to decompose a larger concept. A function should not mix the levels of abstractions. > You know you are working on clean code when each routine turns out to be pretty much what you expected. Don't be afraid to make a name long. The more function arguments the worse - difficulties with testing. Passing a boolean flag to a function is extremely ugly. Grouping arguments into objects seems like cheating, but it is not. Functions should have no side effects *Command Query Separation* - functions should either do something or answer something, but not both. Exceptions are preferred than error codes. Suggestion to extract exception handling to separate function. *Don't repeat yourself* - duplication may be the root of all evil in software. Database norms formed to eliminate duplication in data, OOP concentrates the code, etc. > Writing software is like any other kind of writing. When you write a paper or article, you get your thoughts down > first, then you massage it until it **reads well**. > The art of programming is, and always has been, the art of language design. ## Chapter 4: Comments Comments are usually bad, they mean you failed to express yourself in code. IMO: the Best comments are the ones that are explaining why things were done in a particular way. Don't put historical discussions or irrelevant details into the comments. ## Chapter 5: Formatting Code formatting is important. Visual design of the code is important. Variable should be declared "in well-known for everybody places". Functions should show natural flow -> top-down. Another matter is alignment, e.g. of test cases in parametrised tests. However, variables declarations is an overkill. However, a team should agree upon a single formatting style. ## Chapter 6: Objects and Data Structures Hiding implementation is about abstractions. The Law of Demeter - a module should not know about the innards of the objects it manipulates. Class *C* has a method * f*, method *f* should call the methods of: *C*, object created by *f*, object passed as an argument to *f* or object held in an instance variable of *C*. Train wreck: `ctxt.getOptions().getScratchDir().getAbsolutePath()` - a bunch of couples train cars. Does it violate The Law of Demeter? `ctxt` contains options, which contain a scratch directory, which has absolute path - a lot of knowledge. However, in this case this law does nto apply because these are data structures with no behaviour. It would be good to hide the structure of `ctxt`, e.g.: `ctxt.getScratchDirectoryOption().getAbsolutePath()`. Data Transfer Objects - a class with public variables and no functions, e.g. for communicating with the database. Objects - expose behaviour and hide data, data structures - expose data and have no significant behaviour. ## Chapter 7: Error Handling Error handling is important, but if it obscures logic, it is wrong. Exceptions are preferred over return codes - return codes can clutter the caller with unnecessary code. `try` blocks are like transactions, `catch` has to leave the program into a consistent state. Error messages need to be informative - mention the operation that failed and the type of failure. It might be a good idea to wrap library's error with your own exceptions - this makes library easily replaceable. ## Chapter 8: Boundaries How to keep boundaries of our system clean - e.g. when using external libraries: - when working with collections, wrap them with object and provide only required functionalities. - write learning tests - write tests to explore and understand API - our code shouldn't know too many details about 3rd-party library - use ADAPTER interface - converted from our perfect interface to the provided interface ## Chapter 9: Unit Tests The Three Laws of TDD: - You may not write production code until you have written a failing unit test - You may not write more of unit code than is sufficient to fail, and not compiling is failing - You may not write more production code than is sufficient to pass the currently failing test Test code is just as important as production code. It is not second-class citizen. It must be kept as clean as production code. The Build-Operate-Check pattern - each test is split into three parts: 1. build up the test data 2. operate on test data 3. check that the operation yielded the expected results Test code must be: simple, succinct, expressive, however it doesn't need to be as efficient as production code. One test should test a single concept. Clean tests follow 5 rules - FIRST: - F - Fast - tests should be fast, they should run quickly, if they don't you won't want to run them frequently. - I - Independent - Tests should not depend on each other, one test should not set up conditions for the next test - R - Repeatable - Tests should be repeatable in any environment (office, home, train without network), if they are not you will have an excuse for why they fail - S - Self-Validating - Tests should not have a boolean output, they should either fail or pass - T - Timely - Tests need to be written in a timely fashion, should be written just before the production code ## Chapter 10: Classes Classes should be small. The second rule is that they should be smaller than that. Naming is most probably the best way of determining class size. If we cannot derive a concise name for a class, then it is likely too large. The Single Responsibility Principle - a class or module should have one, and only one reason to change. Cohesion - classes should have a small number of instance variables. Each of class' methods should manipulate one or more of those variables. Open-Closed Principle - class should be open for extensions but closed for modifications. Dependency Inversion Principle - our classes should depend upon abstractions, not on concrete details. ## Chapter 11: Systems It is a myth we can get the systems "right the first time". Instead, we should implement only today's stories, then refactor and expand the system to implement new stories tomorrow. This is the essence of iterative and incremental agility. Use the simplest thing that can possibly work. ## Chapter 12: Emergence According to Kent, a design is simple if it follows these rules: - runs all tests - system needs to be testable - if this can not be achieved, system should not be released, all tests need to pass - contains no duplication - expresses the intent of the programmer - the clearer the code, the less time others will have to spend understanding it (small functions and classes, good names) - minimises the number of classes and methods - the least important rule, above rules are more important, however overall goal should be to keep system small Can set of practices replace experience? No. On the other hand, practices are a crystallised form of the many decades of experience of many authors. ## Chapter 13: Concurrency Concurrency is a decoupling strategy. It helps to decouple what gets done from when it gets done. In single-threaded apps wheat and when are strongly coupled. Concurrency Defence Principles: - Single Responsibility Principle - concurrency-related code should be kept separately from other code - limit the access to any data that may be shared - a good way of avoiding shared data is to avoid sharing data in the first place - use copy of data , collect results from multiple threads and merge results - threads should be as independent as possible Java supports thread-safe collections, e.g. ConcurrentHashMap, there are other classes to support advanced concurrency: ReentrantLock - a lock that can be acquired and released, Semaphore - a classic lock with count, CountDownLatch - a lock that waits for a number of events before releasing all threads waiting on it. Couple of behaviours: - Bound Resources - resources of a fixed size or number used in a concurrent environment, e.g. database connection - Mutual Exclusion - only one thread can access shared data or a shared resource at a time - Starvation - thread(s) prohibited from proceeding for an excessively long time or forever - Deadlock - two or more threads waiting for each other to finish - Livelock - threads in lockstep, each trying to do work but finding another "in the way", threads continue trying to make progress but are unable Execution models: - producer - consumer - one or more threads create some work and place it in a queue, one or more consumer threads acquire that work from queue and complete it - readers - writers - writers wait until there is no readers before allowing the writer to perform an update, if there are continuous readers, writers will starve - dining philosophers - a hungry philosopher needs 2 forks before accessing the food, after consumption releases forks and waits until he is hungry again. There are number of solutions to this problem. `synchronised` keyword introduces a lock in Java. Locks are expensive so use them carefully, also such sections should be small. Graceful shutdown is hard to get correct. Think about it early and get it working early. General tips: - get your non-threaded code working first - make threaded-based code pluggable (one thread, n threads, ...) - run with more threads than processors ## Chapter 17: Smells and Heuristics Comments: - Metadata should not appear in the comment (author, modification date). Comments should be reserved for technical notes only. - Do not write comments that will become obsolete. - Do not paraphrase code. - Be brief and correct. - Instead of commenting-out code - delete it. Environment: - You should be able to check out system with one simple command. - You should be able to run all unit tests with just one command. Functions: - Functions should have a small number of arguments, no argument is best. More than 3 arguments is very questionable and should be avoided. - Output arguments are counterintuitive readers expect arguments to be inputs, not outputs. If function must change state of something, have it change the state of the object it is called on. - Flag arguments should be avoided (boolean flags) - they loudly declare function is doing multiple things. - Methods that are never called should be removed. Dead code is wasteful. General: - The ideal source files should contain one, and only one language (for example Java + JavaScript snippets + English comments). - Function / Class should implement the behaviours that another programmer could reasonably expect. - Check every boundary condition. - No duplication, perhaps the most important rule. Duplicated code means a missed opportunity for abstraction. Codd Normals Forms are a strategy for eliminating duplication. - It is important to create abstractions that separate higher level general concepts from lower level detailed concepts. - High level concepts should be independent of low level derivatives. - A well-defined interface does not offer very many functions to depend upon, so coupling is low. Good software engineers learn to limit what they expose at the interfaces of their classes and modules. - Get rid of dead code - code that is never executed. - Variables and functions should be defined close to where they are used. - Use consistent naming. - Keep source code organised and free of clutter. - Things that don't depend upon each other should not be artificially coupled. - Feature envy - the methods of a class should be interested in the variables and functions of the class they belong to, and not the variables and functions of other classes. - Code should be expressive as possible. - Code should be placed where a reader would naturally expect it to be (the principle of the least surprise). - Think if function should be static or not. - Variables should have meaningful names, also use intermediate variables when performing difficult calculations. - Function names should say what they do, if you can't understand what function does by reading the call - change the name. - Polymorphism is preferred over if / else or switch / case statements. - Follow code standards. - Replace magic numbers with named constants. - Be precise, use appropriate data structures. - Encapsulate conditions - boolean logic is hard to understand without having to see it in the context, extract the functions that explain the intent of the conditional. - Avoid negative conditions - harder to understand. - Functions should do one thing. - Encapsulate boundary conditions. - The statements within a function should all be written at the same level of abstraction. - Keep configurable data at high levels. - Law of Demeter - we don't want a single module to know much about its collaborators. Names: - Choose descriptive names. Names in software are 90% of what makes software readable. - Choose names at the appropriate level of abstraction. Don't pick names that communicate implementation details. - Use standard nomenclature where possible. - Use unambiguous names. - Names should describe side effects. Tests: - Use coverage tool. - Don't skip trivial tests. - Test boundary conditions. - Tests should be fast. ================================================ FILE: books/coaching-agile-teams.md ================================================ [go back](https://github.com/pkardas/learning) # Coaching Agile Teams Book by Lyssa Adkins - [1. Will I be a Good Coach?](#1-will-i-be-a-good-coach) ## 1. Will I be a Good Coach? If teams are to have kinds of stellar experiences, leverage agile to teh full competitive advantage it was meant to provide. Agile coaching matters because it helps both, producing products that matter in the real, complex and uncertain world, and adding meaning to people's work lives. Agile is easy to get going yet hard to do well. Imagine a team that admits mistakes, reinforces their shared values, forgives one another, and moves on. Do you think such a team would come up with astonishing ideas? An agile (or Scrum) coach is: - someone who appreciates teh depths of agile practices and principles and can help teams appreciate them too - someone who has faces big dragons, organizational impediments, and has become a coach to managers and other outsiders in the course of addressing them - someone who can help management at all levels of the organization the benefits of working agile - someone who has brought ideas from professional facilitation, coaching, conflict management, mediation, theater and more to help the team become a high-performance team Native wiring for coaching: - ability to "read a room", ability to read emotion in the air and know whether all is good - care about people more than products - cultivate curiosity - believe that people are basically good - they know that plans fall apart, so they act in the moment with the team - any group of people can do good things - it drives them crazy when someone says "yeah, I know, it's a waste of time, but that's how we do it here" - chaos and destruction are simply building blocks for something better - they risk being wrong ================================================ FILE: books/code-complete.md ================================================ [go back](https://github.com/pkardas/learning) # Code Complete: A Practical Handbook of Software Construction Book by Steve McConnell - [Chapter 1: Software Construction](#chapter-1-software-construction) - [Chapter 2: Metaphors for a Richer Understanding of Software Development](#chapter-2-metaphors-for-a-richer-understanding-of-software-development) - [Chapter 8: Defensive Programming](#chapter-8-defensive-programming) - [Chapter 20: The Software-Quality Landscape](#chapter-20-the-software-quality-landscape) - [Chapter 21: Collaborative Construction](#chapter-21-collaborative-construction) - [Chapter 22: Developer Testing](#chapter-22-developer-testing) - [Chapter 24: Refactoring](#chapter-24-refactoring) - [Chapter 25: Code-Tuning Strategies](#chapter-25-code-tuning-strategies) - [Chapter 32: Self-Documenting Code](#chapter-32-self-documenting-code) - [Chapter 33: Personal Character](#chapter-33-personal-character) - [Chapter 34: Themes in Software Craftsmanship](#chapter-34-themes-in-software-craftsmanship) ## Chapter 1: Software Construction Construction - process of building (planning, designing, checking the work). Construction is mostly coding and debugging but also involves designing, planning, unit testing, ... Centre of the software development process. The only activity that is guaranteed to be done (planning might be imperfect, etc.). ## Chapter 2: Metaphors for a Richer Understanding of Software Development Metaphors contribute to a greater understanding of software-development issues - paper writing metaphor, farming metaphor, etc... ## Chapter 8: Defensive Programming Protecting yourself from "cruel world of incorrect data". Use assertions to document assumptions made in the code. Guidelines: - use assertions for conditions that should never occur, this is not error checking code. On error program should take corrective actions, on assertion fail source code should be updated. - no executable code in asserts: - bad: `assert foo(), ...` - good: `result = foo(); assert result, ...` - use asserts to document and verify preconditions (before executing the routine) and post conditions (after executing the routine) - for high robustness: failed assertions should be handled anyway Error handling: - return neutral value - 0, empty string, ... - substitute the next piece of valid data - for example when processing stream of data from the sensor (e.g. temperature) , you may want to skip the missing value and wait for another - return the same answer as the previous time - some data might not change in time dramatically, so it is okay to return the last correct value - substitute the closest legal value - for example reversing car does not show negative speed value but instead shows 0 (the closest legal value) - log a warning message on incorrect data - return error code - report error has been encountered and trust some other routine higher up will handle the error - call centralised error-processing routine, disadvantage is that entire program coupled with the mechanism - display error message to the user, warning: don't share too much with the user, attacker may use this information - shut down - useful in safety-critical applications While handling errors you need to choose between robustness (do something to keep the software alive) and correctness ( ensuring the data is always correct). Once approach is selected it should be coherent across the system. Exceptions: - they eliminate the possibility to go unnoticed - throw only for truly exceptional situations - for situations that can not be addressed - if exception can be handled locally - handle locally - avoid exceptions in constructors, because if exception happens there, destruction might not be called - resource leak! - include all the information that led to the exception - avoid empty catch blocks - standardise project's use of exceptions Barricades: - similar to having isolated compartments in the hull of a ship, damaged parts are isolated - use validation classes that are responsible for cleaning the data - assume data is unsafe and you need to sanitise it *Offensive programming* - exceptional cases should be handled in a way that makes them obvious during development and recoverable when production code is running. During development, you want errors to be as visible as possible but during production it should not be observable. ## Chapter 20: The Software-Quality Landscape There are many quality metrics: correctness, usability, efficiency, reliability, integrity, adaptivity, accuracy, robustness - these are metrics important to the user, for a programmer more important metrics are: maintainability, flexibility, portability, reusability, readability, testability, understandability. *Techniques for Improving Software Quality*: set up software quality objectives, perform quality assurance activities, prototyping. Defect-detection techniques: design reviews, code reviews, prototyping, unit tests, integration tests, regression tests, ... even all of them combined will not detect all the issues. > Most studies have found that inspections are cheaper than testing. A study at the Software Engineering Laboratory > found that code reading detected about 80% more faults per hour than testing. Cost of detection is only one part. There is also cost of fixing the issues. The longer defect remains in the system, the more expensive it becomes to remove. Recommended combination: Formal inspections of all requirements, architecture, design -> Modeling / prototyping -> Code reading -> Testing. Remember: Improving quality reduces development cost. ## Chapter 21: Collaborative Construction > IBM found that each hour of inspection prevented about 100 hours or related work (testing and defect correction) > Reviews cut the errors by over 80% > Reviews create a venue for more experienced and less experienced programmers to communicate about technical issues. Collective ownership - code is owned by the group rather than by the individuals and can be accessed and modified by various members. Guide on pair programming: - it will not be effective if you argue on styling conventions - don't let it turn into watching - person without the keyboard should be an active participant - sometimes it is better to discuss something on the whiteboard and then go programming solo - rotate pairs - match other's pace, the fast learner needs to slow down - don't force people who don't like each other to pair - no pairing between newbies Nice idea: for discussing the design everyone should come with a prepared list of potential issues. It is good to assign perspectives - maintainer, coder, user, designer. Author in such discussion should play minor role, should only present the overview. Reviewer can be anyone outside author - tester, developer. Management should not be present at the meeting, however should be briefed with the results after the discussion. Design review can not be used for performance appraisals. Group should be focused on identifying defects. Goal of this meeting is not to explore alternatives ar debate who is right and who is wrong. > NASA's Software Engineering Laboratory found that code reading detected about 3.3 defects per hour of effort. Testing > detected 1.8 errors per hour. ## Chapter 22: Developer Testing > You must hope to find errors in your code. Such hope might seem like an unnatural act, but you should hope that it's > you who finds the errors and not someone else. Why TDD: - same effort to write test cases before and after - you detect defects earlier, and you can correct them more easily - forces you to think a little about the requirements and design before writing code - exposes requirements problems sooner Developers tend to write *clean tests* rather than test for all the ways code breaks. Developer's testing isn't sufficient to provide adequate quality assurance. General Principle of Software Quality: improving quality improves the development schedule and reduces development cost. ## Chapter 24: Refactoring The Cardinal Rule of Software Evolution: Evolution should improve the internal quality of the program. Signs / smells that indicate refactoring is needed: - code duplication - you need to do parallel changes - too long routine - too long loop or too deeply nested - poor class cohesion - if a class takes ownership for many unrelated responsibilities - too many parameters - changes require parallel modifications to multiple classes - related data not organised into classes - overloaded primitive data type - class doesn't do much - sometimes the result of refactoring is that an old class doesn't have much to do - trap data - one routine just passes data to another - one class knows too much about the other - poor names - public data members - in general bad idea - subclass uses only a small percentage of its parent routines - comments should not be used to explain bad code - "don't comment bad code, rewrite it" - usage of setup code before routine call - code that "seems like it might be needed one day" - programmers are rather bad at guessing what functionality might be needed someday, *design ahead* introduces unnecessary complexity Data-Lever Refactoring: - replace magic number with a named constant - give a variable informative name - inline expressions - replace expression with a routine - convert data primitive to a class - encapsulate returned collection Statement-Level Refactoring: - decompose boolean expression - use variables that help document the meaning of the expression - move boolean expression into a well-named function - return as soon as you know the return value Routine-Level Refactoring: - Inline simple routines - Convert long routine into a class - Separate query operations from modification operations - Combine similar routines by parametrizing themIf routine depends on the parameter passed in - consider splitting the routine - Pass a whole object instead of specific fields, however if you are creating an object just to pass it to a routine, consider changing the routine to take only specific fields - Routine should return the most specific object (mostly applicable to iterators, collections, ...) Class Implementation Refactoring: - Extract specialised code into a subclass - if class has code that is used by only a subset of its instances - Combine similar code into a superclass - if at least 2 classes have similar code Class Interface Refactoring: - Eliminate classes not doing too much - Hide a delegate - A calling B, A calling C, when really class A should call B and class B should call class C - Or remove middleman, remove B and make A call C directly - Hide routines that are not intended to be used outside the class - Encapsulate unused routines - if you use only small portion of class's interface Refactoring might cause a lot of harm if misused: - refactoring should be small - one refactoring at a time - make a list of needed steps - make a parking lot - in the middle of refactoring you might think about another refactoring, and another, and so on, for changes that aren't required immediately save a list of TODO changes - check IDE / compiler / other tool's errors - refactored code should be retested, programmer should also add more test cases - be careful about small refactoring because they tend to introduce more bugs than big refactoring - adjust approach basing on the risk of the refactoring - some changes are more dangerous than the other Refactoring refers to making a changes in working code and do not affect the program's behaviour. Programmers who are tweaking broken code aren't refactoring - they are hacking. There are many strategies on where refactoring should be started. For example, whenever you are adding a routine you should refactor it's neighbour, or when you are adding a class, or you should refactor error-prone modules, the most complex modules, etc. ## Chapter 25: Code-Tuning Strategies Code tuning is one way of improving a program's performance. You can find other ways to improve performance - faster and without harm to the code. > More computing sins are committed in the name of efficiency (without necessarily achieving it) than for any other > single reason - including blind stupidity ~ Wulf Efficiency can be seen from many viewpoints: - requirements TRW required sub-second response time - this led to highly complex design and cost ~100M $, analysis determined, users would be satisfied with 4 seconds responses 90% of time, modifying the response time requirements reduced cost by ~70M $. Before you invest time solving a performance problem, make sure you are solving a problem that needs to be solved. - design Sometimes program design make it difficult to write high-performance system, others make it hard not to. - class and routine design On this level algorithms and data structures matter. - OS interactions You might not be aware the compiler generated code using heavy OS calls. - code compilation Good compilers, turn good high-level language code into optimised machine code. - hardware Sometimes cheapest and the beast way to improve a program's performance is to buy a new hardware. - code tuning Small-scale changes that affect a single class, routine or just few lines of code, that make it run mode efficiently. Some sources say, you can multiply improvements on each of the six levels, achieving performance improvement of a million-fold. Code tuning is not the most effective way to improve performance! Writing micro-efficient code does not prove you are cool. Efficient code isn't necessarily better. The Pareto Principle: Also known as 80/20 rule, you can get 80% of the result with 20% of effort. Working toward perfection might prevent completion. Complete it first, and then perfect it. The part that needs to be perfect is usually small. False statement: Reducing the lines of code in a high level-language improves the speed or size of the resulting machine code.: ``` # This is slower: for i = 1 to 10 a[i] = i # This is faster: a[1] = 1 a[2] = 2 ... a[10] = 10 ``` It is also impossible to identify performance bottlenecks before program is working completely, hence "You should optimise as you go" is false. Also, premature optimisation is the root of all evil, because you are missing perspective. Compilers are really powerful, however they are better in optimising straightforward code than they are at optimising tricky code. So, design application properly, write clear code and compiler will do the rest :) Sources of inefficiency: - I/O operations - if possible: store data in the memory - paging - an operation that causes the OS to swap pages of memory is much slower than operation that works on only one page of memory. - system calls - calls to system routines are expensive (context switch, saving app state, recovering kernel state), avoid using system calls, write your own routines using small part of the functionality offered by a system routine, work with system vendor to improve performance - interpreted languages - :( - errors - errors in code can be another source of performance problems Experience doesn't help with optimisation. A person's experience might have come from an old machine, language or compiler. You can never be sure about the effect of an optimisation until you measure the effect. ## Chapter 32: Self-Documenting Code Unit development folder - informal document that contains notes used by a developer during construction - main purpose is to provide a trail of design decisions that aren't documented elsewhere. Detailed-design document - low-level design document, describes the class-level or routine-level design decisions. Internal documentation (within the program) is the most detailed kind of documentation. The main contributor to code-level documentation isn't comments, but good programming style, good variable names, clear layout and minimisation of control-flow and data-structure complexity, > **Good comments don't repeat the code or explain it. They clarify its intent. Comments should explain, at a higher > level of abstraction than the code, what you are trying to do.** Kinds of comments: - repeat of the code - comment gives no additional information - explanation of the code - code is so complicated it needs to be explained, make code better instead of adding comments - **summary of the code** - very useful when someone other than the code's original author tries to modify the code - **description of the codes' intent** - IBM study says "understanding programmer's intent is the most difficult problem" - **information that cannot be expressed by code itself** - for example copyright notice, notes about design, references to requirements 3 types of acceptable comments were highlighted above. Effective commenting shouldn't be time-consuming. Guidelines for effective commenting: - if commenting style is too fancy it very likely becomes annoying to maintain - write pseudocode in comments - performance is not a good reason for avoiding commenting (in some languages commenting slows down execution / compilation) - usually solution for this is to pass code through tool striping comments before release End-line comments pose several problems and should be avoided - hard to write meaningful comment in one line, not much space on the right side of the screen. The code itself is always the first documentation you should check. If the code is not good enough, look for comments. Comments should avoid abbreviations. Comments should justify violations of good programming style. Don't comment tricky code, rewrite it. If something is tricky for you, for others it might be incomprehensible. > Make your code so good that you don't need comments, and then comment it to make it even better. Commenting data declarations: - comment the units - comment the range of allowable numeric values - use enumerated types to express coded meanings - comment limitations of input data, use assertions - if variable is used as bit field, explain every bit - if you have comments that refer to a specific variable, make sure the comment stays updated after variable name change Keep comments close to the code they describe. Describe the design approaches, limitations, usage assumptions and so on. Do not document implementation details in the interface. ## Chapter 33: Personal Character The best programmers are the people who realise how small their brains are. The purpose of many good programming practices is to reduce the load on your grey cells: - decomposing - make a system simpler to understand - reviews, inspections and tests - our intellectual capacity is limited, so we augment it with someone's else - short routines reduce the load on our brains - writing programs in terms of the problem domain rather than in terms of low level implementation details reduces mental workload - conventions free brain from the relatively mundane aspects of programming How to exercise curiosity and make a learning a priority? - If your workload consists entirely on short-term assignments that don't develop your skills, be dissatisfied. Half of what you need to know will be outdated in three years. You are not learning, you are turning into a dinosaur. If you can't learn at your job, find a new one. - Experiment if you don't understand something. Learn to make mistakes, learn from the each. Making a mistake is no sin. Failing to learn from mistake is. - Read about problem-solving, don't reinvent the wheel. - Study the work of the great programmers, it is not about reading 500-long source code but for example about high-level design. - Read books, one book is more than most programmers read each year. - Affiliate with other professionals - Set up a professional development plan Mature programmers are honest, which means: you refuse to pretend you are an expert when you are not, you admit your mistakes, you provide realistic estimates, you understand your program. Writing readable code is part of being a team player. As a readability guideline, keep the person who has to modify your code in mind. Programming is communicating with another programmer first and communicating with the computer second. To stay valuable, you have to stay current. For young hungry programmers, this is an advantage. Older programmers sometimes feel they have already earned their stripes and resent having to improve themselves year after year. Good habits matter because most of what you do as a programmer you do without consciously thinking about it. ## Chapter 34: Themes in Software Craftsmanship There are many intellectual tools for handling computer science complexity: - dividing a system into subsystems at the architecture level so that brain can focus on smaller amount of the system at one time - carefully interface definition - preserving the abstraction representing by the interface so that brain doesn't have to remember arbitrary details - avoid global data - avoid deep inheritance hierarchy - carefully define error handling strategy - prevent monster classes creation - keep functions short - use self-explanatory names - minimise number of parameters passed to the routine - use conventions Points above are used to decrease usage of mental resources you need to use in order to understand the code. Abstraction is a particularly powerful tool for managing complexity. Fred Brooks said that the biggest single gain ever made in computer science was in the jump from machine language to higher-level languages. It freed programmers from worrying about detailed quirks of individual pieces of the hardware and allowed them to focus on programming. Reducing complexity is arguably the most important key to being and effective programmer. Collective ability isn't simply the sum of the team members' individual skills. The way people work together determines if abilities sum up or subtract from each other. In real word, requirements are never stable -in order to build software more flexibly - use incremental approach, plan to develop program in several iterations. Write readable code because it helps other people to read the code. Computer doesn't care if code is readable. A professional programmer writes readable code. Even if you think you are the only one who will read your code, in reality, chances are good that someone else will need to modify your code. One study found that 10 generations of maintenance programmers work on an average program before it gets rewritten. If your language doesn't support some mechanisms do not hesitate and implement them (e.g. missing `assert`) on your own. At the highest level, you shouldn't have any idea how the data is stored. Suggested levels of abstraction: 4. High level problem domain terms 3. low level problem domain terms 2. low level implementation structures 1. programming language structures and tools 0. operating system operations and machine instructions ================================================ FILE: books/comic-agile.md ================================================ [go back](https://github.com/pkardas/learning) # Comic Agilé Book by Luxshan Ratnaravi, Mikkel Noe-Nygaard - [1: Transformation](#1-transformation) - [4: Team](#4-team) - [6: Miscellaneous](#6-miscellaneous) ## 1: Transformation Instead of taking a waterfall approach to your agile transformation, take an iterative one and grow the scope organically. Focus on changing the organizational culture to align with an agile one. Product Owners don't dictate anything just because they are accountable for maximizing the value through effective product backlog. The entire Scrum Team collaborates on creating a plan for the next Sprint. Asses the psychological safety in your organization. If it is too low, seek to make working agreements where blameless post-mortems are part of them, so you can create a culture of promoting healthy conflicts and celebration of mistakes ( and learning from them). Help your managers in demanding more psychological safety from their superiors, as that is a prerequisite for the managers creating it for you. If you only partly adopted the agile way of working, the scope and time might be fixed, so the only parameter that the teams can rally vary is how much technical debt to create. ## 4: Team Team Velocity - the velocity is only for the team. If Management doesn't get that, educate them on the purpose and nature of velocity. Technical Debt - If you PO doesn't get the importance of reducing Technical Debt, you need to educate them - spending some time now on reducing the technical debt will most likely decrease time-to-market of new features. Avoid external participants in the team's retrospective (lack of trust to the externals). Use a simple tools for building agile culture, by taking a just-enough approach to your tooling, you free ip energy to focus on the needed behavioral changes. DevOps is not just about tools, testing and CI/CD pipelines - it is more about culture, breaking down silos and aligning cross-functional teams tp the paths of value delivery. WIP limit should create a pull system in the team's flow. This should then bring a conversation about collaboration and the knowledge sharing needed to ensure that the entire team can actually swarm around each PBI. Mob Programming - is about working collaboratively in groups of +3 to deliver high quality software and/or share knowledge between the developers in the mob. The Driver - controls the keyboard, the Navigators are thinking, discussing, reviewing and reflecting. The roles are interchanged. Stability is the foundation for building the trust needed to become high-preforming teams. If team keeps changing, they will have difficulties moving up Tucksman's phases - forming, storming, norming, performing. ## 6: Miscellaneous In the spirit of openness, you don't have to wait for the Retrospective to bring up potential improvements to your ways of working. Companies with diverse leadership are 45% more likely to grow their market share and 70% more likely to capture new markets compared to companies with "non-diverse" leadership. Behavioral diversity is the other half of the equation, which includes: - ensuring everyone is heard - making it safe to propose novel ideas - giving team members decision-making authority - sharing credit for success - giving actionable feedback - implementing feedback from the team ================================================ FILE: books/cracking-coding-interview/Dockerfile ================================================ FROM python:3.10.4 WORKDIR /src ENV PYTHONPATH "${PYTHONPATH}:/src" COPY requirements.txt . RUN pip install -r requirements.txt COPY src/ src/ ================================================ FILE: books/cracking-coding-interview/docker-compose.yml ================================================ version: "3.9" services: interview: build: context: . dockerfile: Dockerfile volumes: - ./:/src ================================================ FILE: books/cracking-coding-interview/notes.md ================================================ [go back](https://github.com/pkardas/learning) # Cracking the Coding Interview: 189 Programming Questions and Solutions Book by Gayle Laakmann McDowell Code here: [click](.) - [Chapter 1: The Interview Process](#chapter-1-the-interview-process) - [Chapter 2: Behind the Scenes](#chapter-2-behind-the-scenes) - [Chapter 3: Special situations](#chapter-3-special-situations) - [Chapter 4: Before the Interview](#chapter-4-before-the-interview) - [Chapter 5: Behavioral Questions](#chapter-5-behavioral-questions) ## Chapter 1: The Interview Process Assessment of a candidate performance: - Analytical skills: Did you need much help to solve the problem? How optimal was your solution? How long did it take you to arrive at a solution? - Coding skills: Were you able to successfully translate your algorithm to reasonable code? Was it clean and well-organized? Did you think of potential errors? Did you use good style? - Technical knowledge: Do you have a strong foundation in computer science and the relevant technologies? - Experience: Have you made good technical decisions in the past? Have you built interesting, challenging projects? Have you shown drive, initiative, and other important factors? - Culture fit: Do your personality and values fit with the company and team? Did you communicate well with your interviewer? False negatives are acceptable. Some good candidates are rejected. The company is out to build a great set of employees. They can accept that they miss out on some good people. Company is far more concerned with false positives: people who do well in an interview but are not in fact very good. Basic data structure and algorithm knowledge is useful. It is a good proxy. These skills are not hard to learn, but are well-correlated with being a good developer. Also, it is hard to ask problem-solving questions that don't involve algorithms and data structures. Your interviewer develops a feel for your performance by comparing you to other people. Getting a hard question isn't a bad thing. When it is harder for you, it is harder for everyone. If you haven't heard back from a company within 3-5 business days after interview, check in with your recruiter. You can almost always re-apply to a company after getting rejected. Typically, you have to wait between 6-12 months. ## Chapter 2: Behind the Scenes "Bar raiser" interviewer is charged with keeping the interview bar high. This person has significant experience with interviews and veto power in the hiring decision. ## Chapter 3: Special situations **Experienced candidates.** More experienced engineers might see slightly less focus on algorithm questions. Some interviewers might hold experienced candidates to a somewhat lower standard. After all, it has been years since these candidates took an algorithms class. Others though hold experienced candidates to a higher standard. On average, it balances out. The exception to this rule is system design and architecture questions. Performance in such interview questions would be evaluated with respect to your experience level. Personality fit: Typically assessed by how you interact with your interviewer. Establishing a friendly, engaging conversation with your interviewers is your ticket to many job offers. **For interviewers.** - Don't actually ask the exact questions in here (this book). You can ask similar questions to these. Some candidates are reading this book. Your goal is to test their problem-solving skills, not their memorization skills. - Ask Medium and Hard problems. When you ask questions that are too easy, performance gets clustered together. - Use hard questions, not hard knowledge. If your question expects obscure knowledge, ask yourself: is this truly an important skill? Most won't remember Dijkstra's algorithm or the specifics of how AVL trees work. - Avoid "scary" questions. Some questions intimidate candidates, because it seems like they involve some specialized knowledge, even if they really don't - math or probability, low-level knowledge, system design or scalability, proprietary systems (e.g. Google Maps). If you are going to ask a question that sounds "scary", make sure you really reassure candidates that it doesn't require the knowledge that they think it does. - Offer positive reinforcement. You want candidates to feel comfortable. A candidate who is nervous will perform poorly, and it doesn't mean that they aren't good. Moreover, a good candidate who has a negative reaction to you or to the company is less likely to accept an offer - and they may dissuade their friends from interviewing/accepting as well. No matter how poorly a candidate is doing, there is always something they got right. Find a way to infuse some positivity into the interview. - Coach your candidates. - Many candidates don't use an example to solve a question. Guide them. - Some candidates take a long time to find the bug because they use an enormous example. They didn't realize it would be more efficient to analyze their code conceptually first, or that a small example would work nearly as well. Guide them. - If they dive into code before they have an optimal solution, pull them back and focus them on the algorithm. - If they get nervous and stuck and aren't sure where to go, suggest to them that they walk through the brute force solution and look for areas to optimize. - Remind them that they can start off with a brute solution. Their first solution doesn't have to be perfect. - If they want silence, give them silence. If your candidate needs this, give your candidate time to think. - Know your mode: sanity check, quality, specialist, and proxy. - Sanity Check - Easy problem-solving or design questions. They assess a minimum degree of competence. You can use them early in the process. - Quality Check - More challenging questions. Designed to be more rigorous and make a candidate think. - Specialist Questions - Test knowledge on specific topics, e.g. Java or machine learning. - Proxy Knowledge - This is knowledge that is not quite at the specialist level, but that you would expect a candidate at their level to know. ## Chapter 4: Before the Interview If you are smart, you can code, and you can prove that, you can land your interview. Resume screeners want to know that you are smart, and you can code. You should prepare your resume to highlight these 2 things. Think twice before cutting more technical lines in order to allow space for your non-technical hobbies. Keep your resume short, max. 1.5-2 pages. Long resumes are not a reflection of having tons of experience, there are a reflection of not understanding how to prioritize content. A resume should not include a full history of every role you have ever had. Include only the relevant positions - the ones that make you a more impressive candidate. For each role, try to discuss you accomplishments with the following approach: "_Accomplishment X by implementing Y which led to Z_". Not everything will fit into this approach, but the principle is the same: what you did, how you did it, and what the results were. ## Chapter 5: Behavioral Questions Ensure that you have one to three projects that you can talk about in detail. You should be able to discuss the technical components in depth. These should be projects where you played a central role. What are your weaknesses? A good answer conveys a real, legitimate weakness but emphasises how you work to overcome it. What questions should you ask the interviewer? - Genuine Questions: these are the questions you actually want to know the answer to. - Insightful Questions: these questions demonstrate your knowledge or understanding of technology. These questions will typically require advance research about the company. - Passion Questions: these questions are designed to demonstrate your passion for technology. They show that you are interested in learning and will be a strong contributor to the company. E.g.: I am very interested in scalability, and I would love to learn more about it. What opportunities are there at this company to learn about this? Be specific, not arrogant. How do you make yourself sound good without being arrogant? Be specific. Specificity means giving just the facts and letting the interviewer derive an interpretation. Stay light on details and just state the key points. Your interviewer can ask for more details. Focus on yourself, not your team. More "I", less "we". Give structured answers. 1. Nugget first - means starting your response with a "nugget" that succinctly describes what your response will be about. 2. S.A.R (Situation, Action, Result) - you start off outlining the situation, then explaining the actions you took, and lastly, describing the result. Tell me about yourself, suggested structure: 1. Current role (headline only) 2. College 3. Post college & onwards (job, technologies) 4. Current role (more details) 5. Outside of work (hobbies) 6. Wrap up (what are you looking for) ================================================ FILE: books/cracking-coding-interview/requirements.txt ================================================ pytest==7.1.2 ================================================ FILE: books/cracking-coding-interview/src/ch01_arrays_and_strings/check_permutation.py ================================================ import pytest def check_permutation_sets(string: str, potential_permutation_string: str) -> bool: return len(string) == len(potential_permutation_string) and set(string) == set(potential_permutation_string) def check_permutation_sort(string: str, potential_permutation_string: str) -> bool: return sorted(string) == sorted(potential_permutation_string) def check_permutation_array(string: str, potential_permutation_string: str) -> bool: if len(string) != len(potential_permutation_string): return False url_array = [0] * 128 for ch in string: url_array[ord(ch)] += 1 for ch in potential_permutation_string: url_array[ord(ch)] -= 1 if url_array[ord(ch)] < 0: return False return True @pytest.mark.parametrize("string, potential_permutation_string, is_permutation", [ # @formatter:off ("god", "dog", True), ("god", "dod", False), ("god", "dogg", False), ("cat belongs to ala", "ala belongs to cat", True), ("interview questions", "interviews question", True), ("interview questions", "interview question", False), # @formatter:on ]) @pytest.mark.parametrize("function", [ check_permutation_sets, check_permutation_sort, check_permutation_array, ]) def test_algorithm(function, string, potential_permutation_string, is_permutation): assert function(string, potential_permutation_string) == is_permutation ================================================ FILE: books/cracking-coding-interview/src/ch01_arrays_and_strings/is_unique.py ================================================ import pytest def check_if_has_unique_characters_pythonic(string: str) -> bool: return len(set(string)) == len(string) def check_if_has_unique_characters_ascii(string: str) -> bool: boolean_array = [False] * 128 for ch in string: int_ch = ord(ch) if boolean_array[int_ch]: return False boolean_array[int_ch] = True return True def check_if_has_unique_characters_no_structures(string: str) -> bool: for i, ch_0 in enumerate(string): for ch_1 in string[i + 1:]: if ch_0 == ch_1: return False return True def check_if_has_unique_characters_no_structures_sort(string: str) -> bool: sorted_string = sorted(string) for i in range(len(sorted_string) - 1): if sorted_string[i] == sorted_string[i + 1]: return False return True @pytest.mark.parametrize("string, has_all_unique_chars", [ # @formatter:off ("qwerty", True), ("", True), ("qqwert", False), ("qwertt", False), # @formatter:on ]) @pytest.mark.parametrize("function", [ check_if_has_unique_characters_pythonic, check_if_has_unique_characters_ascii, check_if_has_unique_characters_no_structures, check_if_has_unique_characters_no_structures_sort, ]) def test_algorithm(function, string, has_all_unique_chars): assert function(string) == has_all_unique_chars ================================================ FILE: books/cracking-coding-interview/src/ch01_arrays_and_strings/one_away.py ================================================ import pytest def is_one_edit_away_pythonic(string: str, edit: str) -> bool: if abs(len(string) - len(edit)) > 1: return False if string in edit or edit in string: return True return len(set(string) - set(edit)) <= 1 def is_one_edit_away_loop(string: str, edit: str) -> bool: if abs(len(string) - len(edit)) > 1: return False shorter_text, longer_text = string if len(string) < len(edit) else edit, string if len(string) >= len(edit) else edit shorter_i, longer_i = 0, -1 edit_found = False while shorter_i < len(shorter_text) and longer_i < len(longer_text): longer_i += 1 if shorter_text[shorter_i] == longer_text[longer_i]: shorter_i += 1 continue if edit_found: return False if len(string) == len(edit): shorter_i += 1 edit_found = True return True @pytest.mark.parametrize("string, edit, expected_result", [ # @formatter:off ("pale", "ple", True), ("pale", "ale", True), ("ale", "pale", True), ("pales", "pale", True), ("pale", "bale", True), ("pale", "bake", False), ("pale", "ba", False), # @formatter:on ]) @pytest.mark.parametrize("function", [ is_one_edit_away_pythonic, is_one_edit_away_loop ]) def test_algorithm(function, string, edit, expected_result): assert function(string, edit) == expected_result ================================================ FILE: books/cracking-coding-interview/src/ch01_arrays_and_strings/palindrome_permutation.py ================================================ from collections import Counter import pytest def is_palindrome_permutation_pythonic(string: str) -> bool: raw_string = string.replace(' ', '') letter_frequency = Counter(raw_string) if len(raw_string) % 2 == 0: return all(frequency % 2 == 0 for frequency in letter_frequency.values()) else: return sum(1 for frequency in letter_frequency.values() if frequency == 1) <= 1 def is_palindrome_permutation_counter(string: str) -> bool: raw_string = string.replace(' ', '') letter_frequency = Counter() num_of_odd = 0 for ch in raw_string: letter_frequency[ch] += 1 if letter_frequency[ch] % 2 == 1: num_of_odd += 1 else: num_of_odd -= 1 return num_of_odd <= 1 @pytest.mark.parametrize("string, expected_result", [ # @formatter:off ("tact coa", True), ("kamil slimak", True), ("slimakkamil ", True), ("aaaaaab", True), ("aaa", True), ("aaaaacb", False), ("abc", False), ("slimakoamil ", False), # @formatter:on ]) @pytest.mark.parametrize("function", [ is_palindrome_permutation_pythonic, is_palindrome_permutation_counter ]) def test_algorithm(function, string, expected_result): assert function(string) == expected_result ================================================ FILE: books/cracking-coding-interview/src/ch01_arrays_and_strings/rotate_matrix.py ================================================ from typing import List import pytest def rotate_matrix_list_comprehension(matrix: List[List[int]]) -> List[List[int]]: size = len(matrix) return [ [matrix[col][row] for col in reversed(range(size))] for row in range(size) ] def rotate_matrix_zip(matrix: List[List[int]]) -> List[List[int]]: return [list(reversed(row)) for row in zip(*matrix)] @pytest.mark.parametrize("matrix, rotated_matrix", [ ([[1, 2], [3, 4]], [[3, 1], [4, 2]]), ([[1, 2, 3], [4, 5, 6], [7, 8, 9]], [[7, 4, 1], [8, 5, 2], [9, 6, 3]]), ([[1, 2, 3, 8], [4, 5, 6, 8], [7, 8, 9, 8], [8, 8, 8, 8]], [[8, 7, 4, 1], [8, 8, 5, 2], [8, 9, 6, 3], [8, 8, 8, 8]]) ]) @pytest.mark.parametrize("function", [ rotate_matrix_zip, rotate_matrix_list_comprehension ]) def test_algorithm(function, matrix, rotated_matrix): assert function(matrix) == rotated_matrix ================================================ FILE: books/cracking-coding-interview/src/ch01_arrays_and_strings/string_compression.py ================================================ from dataclasses import dataclass import pytest def compress_string(text: str) -> str: @dataclass class Compressed: char: str freq: int compressed = [] for ch in text: if compressed and ch == compressed[-1].char: compressed[-1].freq += 1 else: compressed.append(Compressed(char=ch, freq=1)) return ''.join(f"{c.char}{c.freq}" for c in compressed) if len(compressed) * 2 < len(text) else text @pytest.mark.parametrize("text, expected_result", [ # @formatter:off ("a", "a"), ("aabb", "aabb"), ("aaaa", "a4"), ("aabbb", "a2b3"), ("aabbbaa", "a2b3a2"), # @formatter:on ]) def test_algorithm(text, expected_result): assert compress_string(text) == expected_result ================================================ FILE: books/cracking-coding-interview/src/ch01_arrays_and_strings/string_rotation.py ================================================ import pytest def is_rotated(string: str, rotated_string: str) -> bool: return len(string) == len(rotated_string) and rotated_string in string * 2 @pytest.mark.parametrize("string, rotated_string, expected_result", [ # @formatter:off ("", "", True), ("waterbottle", "erbottlewat", True), ("dog", "gdo", True), ("dog", "dogdo", False), ("dog", "godd", False), ("dog", "go", False), # @formatter:on ]) def test_algorithm(string, rotated_string, expected_result): assert is_rotated(string, rotated_string) == expected_result ================================================ FILE: books/cracking-coding-interview/src/ch01_arrays_and_strings/urlify.py ================================================ import pytest def urlify_pythonic(url: str) -> str: return ' '.join(url.split()).replace(' ', "%20") def urlify_array(url: str) -> str: result_url = "" last_appended_character = None for ch in url: if ch == ' ' and last_appended_character is None: # Do not duplicate '%20' in the URL continue elif ch == ' ' and last_appended_character: last_appended_character = None result_url += "%20" else: last_appended_character = ch result_url += ch if last_appended_character is None: return result_url[:-3] return result_url @pytest.mark.parametrize("url, expected_url", [ # @formatter:off ("Mr John Smith", "Mr%20John%20Smith"), ("Mr John Smith", "Mr%20John%20Smith"), (" Mr John Smith", "Mr%20John%20Smith"), ("Mr John Smith ", "Mr%20John%20Smith"), ("Mr ", "Mr"), ("M ", "M"), (" ", ""), ("", ""), # @formatter:on ]) @pytest.mark.parametrize("function", [ urlify_pythonic, urlify_array, ]) def test_algorithm(function, url, expected_url): assert function(url) == expected_url ================================================ FILE: books/cracking-coding-interview/src/ch01_arrays_and_strings/zero_matrix.py ================================================ from typing import List import pytest def nullify_loop(matrix: List[List[int]]) -> List[List[int]]: height, width = len(matrix), len(matrix[0]) columns, rows = set(), set() for row in range(height): for col in range(width): if matrix[row][col] == 0: columns.add(col) rows.add(row) return [ [ 0 if row in rows or col in columns else matrix[row][col] for col in range(width) ] for row in range(height) ] def nullify_in_place(matrix: List[List[int]]) -> List[List[int]]: height, width = len(matrix), len(matrix[0]) def nullify_column(pos: int) -> None: for i in range(height): matrix[i][pos] = 0 def nullify_row(pos: int) -> None: matrix[pos] = [0] * width col_start = 0 for row in range(height): for col in range(col_start, width): if matrix[row][col] == 0: nullify_row(row) nullify_column(col) col_start = col + 1 break return matrix @pytest.mark.parametrize("matrix, rotated_matrix", [ ([[0, 2], [3, 4]], [[0, 0], [0, 4]]), ([[1, 2, 3, 4], [1, 0, 3, 4], [1, 2, 3, 0]], [[1, 0, 3, 0], [0, 0, 0, 0], [0, 0, 0, 0]]) ]) @pytest.mark.parametrize("function", [ nullify_loop, nullify_in_place, ]) def test_algorithm(function, matrix, rotated_matrix): assert function(matrix) == rotated_matrix ================================================ FILE: books/cracking-coding-interview/src/ch02_linked_lists/delete_middle_node.py ================================================ import pytest from linked_list import ( LinkedList, Node, ) def delete_middle_node(node: Node) -> None: assert node.next, "node is not the last node in the linked list" node.data = node.next.data node.next = node.next.next @pytest.mark.parametrize("values, node, expected_result", [ # @formatter:off ([1, 2, 3, 4], 2, [1, 3, 4]), ([1, 2, 3, 4], 3, [1, 2, 4]), # @formatter:on ]) def test_algorithm(values, node, expected_result): linked_list = LinkedList(values) delete_middle_node(linked_list.node_for_value(node)) assert linked_list.values == expected_result ================================================ FILE: books/cracking-coding-interview/src/ch02_linked_lists/intersection.py ================================================ from typing import Optional import pytest from linked_list import ( LinkedList, Node, ) def intersection(list_0: LinkedList, list_1: LinkedList) -> Optional[Node]: if list_0.tail != list_1.tail: return None l0_node, l1_node = list_0.head, list_1.head l0_len, l1_len = list_0.length, list_1.length # Advance pointers when lists have different size: if l0_len > l1_len: for i in range(l0_len - l1_len): l0_node = l0_node.next if l0_len < l1_len: for i in range(l1_len - l0_len): l1_len = l1_len.next while l0_node and l1_node: if l0_node == l1_node: return l0_node l0_node = l0_node.next l1_node = l1_node.next assert False, "Loop above must finish the program" l0 = LinkedList([3, 1, 5, 9]) l1 = LinkedList([4, 6]) tail = LinkedList([7, 2, 1]).head l4 = LinkedList([3, 1, 5, 9, 7, 2, 1]) l5 = LinkedList([4, 6, 7, 2, 1]) @pytest.mark.parametrize("list_0, list_0_tail, list_1, list_1_tail, expected_result", [ # @formatter:off (l0, tail, l1, tail, tail), (l4, None, l5, None, None) # @formatter:on ]) def test_algorithm(list_0, list_0_tail, list_1, list_1_tail, expected_result): list_0.tail.next = list_0_tail list_1.tail.next = list_1_tail assert intersection(list_0, list_1) == expected_result ================================================ FILE: books/cracking-coding-interview/src/ch02_linked_lists/linked_list.py ================================================ from typing import ( List, Optional, ) import pytest class Node: def __init__(self, data: int) -> None: self.next = None self.data = data class LinkedList: def __init__(self, data: List[int]) -> None: self.head = None for val in data: self.append(val) @property def values(self) -> List[int]: result, current = [], self.head while current: result.append(current.data) current = current.next return result @property def tail(self) -> Optional[Node]: node = self.head while node and node.next: node = node.next return node @property def length(self) -> int: return len(self.values) def node_for_value(self, val: int) -> Optional[Node]: node = self.head while node: if node.data == val: return node node = node.next return None def append(self, data: int) -> None: self.head = append(self.head, data) def delete(self, data: int) -> None: self.head = delete(self.head, data) def delete(head: Optional[Node], data: int) -> Optional[Node]: node = head if not node: return None if head.data == data: return head.next while node.next: if node.next.data == data: node.next = node.next.next break node = node.next return head def append(head: Optional[Node], data: int) -> Optional[Node]: if not head: return Node(data) current, end = head, Node(data) while current.next: current = current.next current.next = end return head @pytest.mark.parametrize("values", [ [], [1], [1, 2], [1, 2, 3], ]) def test_append(values): assert LinkedList(values).values == values @pytest.mark.parametrize("values, to_delete, expected_result", [ # @formatter:off ([], 0, []), ([1], 0, [1]), ([1], 1, []), ([1, 2], 1, [2]), ([1, 2], 2, [1]), ([1, 2, 3], 2, [1, 3]), # @formatter:on ]) def test_delete(values, to_delete, expected_result): linked_list = LinkedList(values) linked_list.delete(to_delete) assert linked_list.values == expected_result @pytest.mark.parametrize("values, value, expected_node_val", [ # @formatter:off ([1, 2, 3, 4], 2, 2), ([1, 2, 3, 4], 5, None) # @formatter:on ]) def test_node_for_value(values, value, expected_node_val): node = LinkedList(values).node_for_value(value) assert node.data if node else node == expected_node_val @pytest.mark.parametrize("values, expected_tail", [ # @formatter:off ([], None), ([1], 1), ([1, 2], 2), # @formatter:on ]) def test_tail(values, expected_tail): tail = LinkedList(values).tail assert tail.data == expected_tail if expected_tail else tail is None ================================================ FILE: books/cracking-coding-interview/src/ch02_linked_lists/loop_detection.py ================================================ from typing import Optional import pytest from linked_list import ( LinkedList, Node, ) def get_loop(linked_list: LinkedList) -> Optional[Node]: slow, fast = linked_list.head, linked_list.head def get_loop_head(): nonlocal slow, fast slow = linked_list.head while slow != fast: slow = slow.next fast = fast.next return fast while fast and fast.next: slow = slow.next fast = fast.next.next if slow == fast: return get_loop_head() return None l0 = LinkedList([1, 2, 3, 4, 5]) l0.node_for_value(5).next = l0.node_for_value(3) l1 = LinkedList([1, 2, 3, 4, 5]) @pytest.mark.parametrize("linked_list, expected_result", [ # @formatter:off (l0, l0.node_for_value(3)), (l1, None), # @formatter:on ]) def test_algorithm(linked_list, expected_result): assert get_loop(linked_list) == expected_result ================================================ FILE: books/cracking-coding-interview/src/ch02_linked_lists/palindrome.py ================================================ import pytest from linked_list import ( LinkedList, Node, ) def is_palindrome_simple(linked_list: LinkedList) -> bool: values = linked_list.values return values == values[::-1] def is_palindrome_reverse(linked_list: LinkedList) -> bool: def reverse_list() -> Node: head, node = None, linked_list.head while node: new_node = Node(data=node.data) new_node.next = head head = new_node node = node.next return head normal_node = linked_list.head reversed_node = reverse_list() while normal_node and reversed_node: if normal_node.data != reversed_node.data: return False normal_node = normal_node.next reversed_node = reversed_node.next return not normal_node and not reversed_node def is_palindrome_slow_fast_runner(linked_list: LinkedList) -> bool: slow, fast = linked_list.head, linked_list.head stack = [] while fast and fast.next: stack.append(slow.data) slow = slow.next fast = fast.next.next if fast: slow = slow.next while slow: if stack and stack.pop() != slow.data: return False slow = slow.next return True @pytest.mark.parametrize("values, expected_result", [ # @formatter:off ([1, 2, 3, 4], False), ([1, 2, 2, 2], False), ([1, 2, 2, 1], True), ([1, 2, 1], True), ([1], True), ([], True) # @formatter:on ]) @pytest.mark.parametrize("function", [ is_palindrome_simple, is_palindrome_reverse, is_palindrome_slow_fast_runner, ]) def test_algorithm(function, values, expected_result): linked_list = LinkedList(values) assert function(linked_list) == expected_result ================================================ FILE: books/cracking-coding-interview/src/ch02_linked_lists/partition.py ================================================ from typing import Tuple import pytest from linked_list import LinkedList def partition(linked_list: LinkedList, partition_val: int) -> Tuple[LinkedList, LinkedList]: l1, l2 = LinkedList(data=[]), LinkedList(data=[]) node = linked_list.head while node: if node.data < partition_val: l1.append(node.data) else: l2.append(node.data) node = node.next return l1, l2 @pytest.mark.parametrize("values, partition_val, expected_values", [ # @formatter:off ([1, 2, 3, 4, 5], 3, ([1, 2], [3, 4, 5])), ([1, 2, 3, 4, 5], 0, ([], [1, 2, 3, 4, 5])), ([1, 2, 3, 4, 5], 6, ([1, 2, 3, 4, 5], [])), # @formatter:on ]) def test_algorithm(values, partition_val, expected_values): linked_list = LinkedList(values) l1, l2 = partition(linked_list, partition_val) assert (l1.values, l2.values) == expected_values ================================================ FILE: books/cracking-coding-interview/src/ch02_linked_lists/remove_dups.py ================================================ import pytest from linked_list import LinkedList def remove_duplicates_buffer(linked_list: LinkedList) -> LinkedList: unique_data = set() prev, current = None, linked_list.head while current: if current.data in unique_data: prev.next = current.next else: unique_data.add(current.data) prev = current current = current.next return linked_list def remove_duplicates_no_buffer(linked_list: LinkedList) -> LinkedList: current = linked_list.head while current: runner = current while runner.next: if current.data == runner.next.data: runner.next = runner.next.next else: runner = runner.next current = current.next return linked_list @pytest.mark.parametrize("values, expected_result", [ # @formatter:off ([], []), ([1, 1], [1]), ([1, 1, 0], [1, 0]), ([1, 1, 1, 1], [1]), ([0, 1, 0, 1], [0, 1]), ([1, 2, 3, 4], [1, 2, 3, 4]), # @formatter:on ]) @pytest.mark.parametrize("function", [ remove_duplicates_buffer, remove_duplicates_no_buffer ]) def test_algorithm(function, values, expected_result): linked_list = LinkedList(values) assert function(linked_list).values == expected_result ================================================ FILE: books/cracking-coding-interview/src/ch02_linked_lists/return_kth_to_last.py ================================================ from typing import Optional import pytest from linked_list import ( LinkedList, Node, ) def return_kth_to_last_simple(linked_list: LinkedList, k: int) -> int: node = linked_list.head position, i = len(linked_list.values) - k, 0 if position < 0: return -1 while node and i < position: node = node.next i += 1 return node.data def return_kth_to_last_simplest(linked_list: LinkedList, k: int) -> int: values = linked_list.values size = len(values) return values[size - k] if size - k >= 0 else -1 def return_kth_to_last_recursive(linked_list: LinkedList, k: int) -> int: found_value = None def _return_kth_to_last(node: Optional[Node]) -> int: if not node: return 0 index = _return_kth_to_last(node.next) + 1 if index == k: nonlocal found_value found_value = node.data return index _return_kth_to_last(linked_list.head) return found_value if found_value else -1 def return_kth_to_last_iterative(linked_list: LinkedList, k: int) -> int: p1, p2 = linked_list.head, linked_list.head for _ in range(k): if not p1: return -1 p1 = p1.next while p1: p1 = p1.next p2 = p2.next return p2.data @pytest.mark.parametrize("values, k, expected_result", [ # @formatter:off ([1, 2, 3], 1, 3), ([1, 2, 3], 2, 2), ([1, 2, 3], 3, 1), ([1, 2, 3], 4, -1), # @formatter:on ]) @pytest.mark.parametrize("function", [ return_kth_to_last_simple, return_kth_to_last_simplest, return_kth_to_last_recursive, return_kth_to_last_iterative, ]) def test_algorithm(function, values, k, expected_result): linked_list = LinkedList(values) assert function(linked_list, k) == expected_result ================================================ FILE: books/cracking-coding-interview/src/ch02_linked_lists/sum_lists.py ================================================ import pytest from linked_list import ( LinkedList, Node, ) def sum_lists(list_0: LinkedList, list_1: LinkedList) -> LinkedList: result, remainder = [], 0 node_0, node_1 = list_0.head, list_1.head def add_aligned_lists() -> None: nonlocal node_0, node_1, result, remainder while node_0 and node_1: result.append((node_0.data + node_1.data + remainder) % 10) remainder = 1 if (node_0.data + node_1.data + remainder) >= 10 else 0 node_0, node_1 = node_0.next, node_1.next def align_remaining_list(node: Node) -> None: nonlocal result, remainder while node: result.append((node.data + remainder) % 10) remainder = 1 if (node.data + remainder) >= 10 else 0 node = node.next add_aligned_lists() align_remaining_list(node_0) align_remaining_list(node_1) if remainder: result.append(remainder) return LinkedList(result) @pytest.mark.parametrize("list_0, list_1, expected_result", [ # @formatter:off ([7, 1, 6], [5, 9, 2], [2, 1, 9]), ([1, 7, 1], [3], [4, 7, 1]), ([9, 9, 9], [1], [0, 0, 0, 1]), ([7, 1], [3, 1], [0, 3]), ([7, 1], [3], [0, 2]), # @formatter:on ]) def test_algorithm(list_0, list_1, expected_result): list_0, list_1 = LinkedList(list_0), LinkedList(list_1) assert sum_lists(list_0, list_1).values == expected_result ================================================ FILE: books/ddd.md ================================================ [go back](https://github.com/pkardas/learning) # Domain-Driven Design: Tackling Complexity in the Heart of Software Book by Eric Evans - [Chapter 1: Crunching Knowledge](#chapter-1-crunching-knowledge) - [Chapter 2: Communication and the Use of Language](#chapter-2-communication-and-the-use-of-language) - [Chapter 3: Binding Model and Implementation](#chapter-3-binding-model-and-implementation) - [Chapter 4: Isolating the Domain](#chapter-4-isolating-the-domain) - [Chapter 5: A Model Expressed in Software](#chapter-5-a-model-expressed-in-software) - [Chapter 6: The Life Cycle of a Domain Object](#chapter-6-the-life-cycle-of-a-domain-object) - [Chapter 7: Using the Language: An Extended Example](#chapter-7-using-the-language-an-extended-example) - [Chapter 8: Breakthrough](#chapter-8-breakthrough) - [Chapter 9: Making Implicit Concepts Explicit](#chapter-9-making-implicit-concepts-explicit) - [Chapter 10: Supple Design](#chapter-10-supple-design) - [Chapter 11: Applying Analysis Patterns](#chapter-11-applying-analysis-patterns) - [Chapter 12: Relating Design Patterns to the Model](#chapter-12-relating-design-patterns-to-the-model) - [Chapter 13: Refactoring Toward Deeper Insight](#chapter-13-refactoring-toward-deeper-insight) - [Chapter 14: Managing Model Integrity](#chapter-14-managing-model-integrity) - [Chapter 15: Distillation](#chapter-15-distillation) - [Chapter 16: Large-Scale Structure](#chapter-16-large-scale-structure) ## Chapter 1: Crunching Knowledge Effective modeling: - Binding model and the implementation - Cultivating a language based on the model - Developing a knowledge-rich model - Distilling the model - drop unneeded concepts - Brainstorming and experimenting Effective domain modellers are knowledge crunchers (take a torrent of information and prove it for relevant trickle). Knowledge crunching is a collaborative work, typically led by developers in cooperation with domain experts. Early versions or prototypes feed experience back into the team and change interpretations. All projects lack knowledge - people leave, team reorganisations happen - in general, knowledge is lost. Highly productive teams grow their knowledge continuously - improve technical knowledge along with general domain-modelling skills, but also seriously learn about specific domain they are working on. The accumulated knowledge makes them effective knowledge crunchers. Software is unable to fill in gaps with common sense - that is why knowledge crunching is important. Example with overbooking strategy: overbooking check should be extracted from the booking functionality to be more explicit and visible. This is example of domain modeling and securing and sharing knowledge. ## Chapter 2: Communication and the Use of Language The domain experts and developers use different language. Experts vaguely describe what they want, developers vaguely understand. Cost of translation, plus the risk of misunderstanding is too high. A project needs a common language. Ubiquitous language includes: names of classes and prominent operations, terms to discuss. Model based language should be used to describe artefacts, tasks and functionalities. Language may change to fit the discussion better. These changes will lead to refactoring of the code. Change in the language is change to the model. The domain-model-based terminology makes conversations more concise, you avoid talking about low level implementation details, instead you use high level concepts (like in the example: Itinerary, Routing Service, Route Specification instead of cargo id, origin and destination, ...). Play with the model as you talk about the system, find easier ways to say what you need to say, and take those new ideas back down to the diagrams and code. The team should use ONE and only ONE language. Almost every conversation is an opportunity for the developers and domain experts to play with the model, deepen understanding and fine tune it. Domain model is something between business terms developers don't understand and technical aspect of the design. The vital detail about the design in captured in the code. Well written implementation should be transparent and reveal the model underlying it. The model is not the diagram, diagrams help to communicate and explain the model. Extreme Programming advocates using no extra design documents at all (usually because the fall out of sync) - the code should speak for itself. This motivates developers to keep code clean and transparent. However, if document exists, it should not try to do what code already does well - document should illuminate meaning, give insight into large-scale structures, clarify design intent, complement the code and the talking. ## Chapter 3: Binding Model and Implementation Tightly relating the code to an underlying model gives the code meaning and makes the model relevant. Design must map to the domain model, if not, the correctness of the software is suspect. Model-Driven Design - discards the dichotomy of analysis model and design to search out a single model that serves both purposes (ubiquitous language). Each object in the design plays a conceptual role described in the model. Model needs to be revised to reflect the model in a very literal way, so mapping is obvious. The code becomes expression of the model. Model-Driven Design is hard to accomplish in procedural languages like C or Fortran. This approach is reserved for object-oriented programming languages. Implementation model should not be exposed to the user. People responsible for the implementation should participate in modeling. Strict separation of responsibilities is harmful. Modeling and implementation are couples in model-driven design. Any technical person contributing to the model must spend some time touching the code. Every developer must be involved in some level of discussion about the model. ## Chapter 4: Isolating the Domain Layered Architecture - the essential principle is that any element of a layer depends only on other elements in the same layer or on elements of the layers beneath it. Each layer specialises in a particular aspect of a computer program. Most commonly used layers: - UI (Presentation) Layer - showing information to the user and interpreting the user's commands. - Application Layer - this layer does not contain business logic, but only coordinates tasks and delegates work to collaborations of domain objects in the next layer down. - Domain (Model) Layer - responsible for representing concepts of business, information about business situation and business rules. This layer is the heart of business software. - Infrastructure Layer - generic technical capabilities that support the higher layers (message sending, drawing widgets on the UI, ...), may also support the pattern of interactions between the 4 layers through an architectural framework. Partition a complex program into layers, develop a design within each layer that is cohesive and that depends only on the layers below. Concentrate all the code related the domain model in one layer and isolate it from the rest of the user interface, application and infrastructure code. The domain models, free of the responsibility of displaying themselves, storing themselves, managing application tasks and so forth, can be focused on expressing the domain model. This allows to evolve model to be rich enough and clear enough to capture essential business knowledge and put it to work. Such separation allows a much cleaner design for each layer, especially because they tend to evolve at different pace. Upper layers can user or manipulate elements of lower ones straightforwardly by calling their public interfaces. Domain-driven design requires only one particular layer to exist. ## Chapter 5: A Model Expressed in Software ASSOCIATIONS. For every traversable association in the model, there is a mechanism in the software with the same properties. Constraints on associations should be included in the model and implementation (e.g. president of ... for a period of time), they make the model more precise and the implementation easier to maintain. ENTITIES. Object modeling tends to lead us to focus on the attributes of an object, but the fundamental concept of an entity is an abstract continuity threading through a life cycle and even passing through multiple forms. Sometimes such an object must be matched with another object even though attributes differ. Transactions in a banking application, two deposits of the same amount to the same account on the same day are still distinct transactions. They have identity and are entities. > When an object is distinguished by its identity, rather than its attributes, make this primary to its definition in > the model. Keep the class definition simple and focused on life cycle continuity and identity. Define a means of > distinguishing each object regardless of its form or history. Identity - this may simply mean unique identifier. Each entity must have an operational way of establishing its identity with another object - distinguishable even from another object with the same descriptive attributes. Defining identity demands understanding of the domain. VALUE OBJECTS. An object that represents a descriptive aspect of the domain with no conceptual identity. These are objects that describe things. When you care only about the attributes of an element of the model, classify it as a value object. SERVICES. Some concepts from the domain aren't natural to model as objects. Forcing the required domain functionality to be the responsibility of an entity or value either distorts the definition of a model-based object or adds meaningless artificial objects. A service is an operation offered as an interface that stands alone in the model, without encapsulating state. The name *service* emphasises the relationship with other objects. Service have to be stateless. MODULES. Many don't consider modules as part of the model. Yet it isn't just code being divided into modules, but concepts. Low coupling between modules minimises the cost of understanding their place in the design. It is possible to analyse the contents of one module with a minimum of reference to others that interact. Choose modules that tell the story of the system and contain a cohesive set of concepts. Give the modules names that become part of the ubiquitous language. Modules and their names should reflect insight into the domain. Modules need to co-evolve with the rest of the model. This means refactoring modules right along with the model and code. But this refactoring often doesn't happen. Use packaging to separate the domain layer from other code. Otherwise, leave as much freedom as possible to the domain developers to package the domain objects in ways that support their model and design choices. ## Chapter 6: The Life Cycle of a Domain Object The challenges: - Maintaining object integrity throughout the life cycle - Preventing the model from getting swamped by the complexity of managing the life cycle These issues can be addressed using 3 patterns. AGGREGATES. It is difficult to guarantee the consistency of changes to object in a model with complex associations. Invariants need to be maintained that apply to closely related groups of objects, not just discrete objects. Yet cautious locking schemes cause multiple users to interfere pointlessly with each other and make a system unusable. An aggregate is a cluster or associated objects that we treat as a unit for the purpose of data changes. Each aggregate has a root and a boundary. Chose one entity to be the root of each aggregate, and control all access to the objects inside the boundary through the root. Allow external objects to hold references to the root only. FACTORIES. When creation of an object, or an entire aggregation, becomes complicated or reveals too much of the internal structure, factories provide encapsulation (assembly of a car: cars are never assembled and driven at the same time, there is no value in combining both of these functions into the same mechanism). Creation of an object can be a major operation by itself, but complex assembly operations do not fit the responsibility of the created objects. Combining such responsibilities can produce ungainly designs that are hard to understand. Making the client direct construction muddies the design of the client, breaches encapsulation of the assembled object or aggregate, and overly couples the client to the implementation of the created object. Two basic requirements for any good factory: 1. Each creation method is atomic and enforces all invariants of the created object or aggregate. 2. The factory should be abstracted to the type desired, rather than the concrete class created REPOSITORIES. Associations allow us to find an object based on its relationship to another. But we must have a starting point for a traversal to an entity of value in the middle of its life cycle. For each type of object that needs global access, create an object that can provide the illusion of an in-memory collection of all objects of that type. Set up access through a well-known global interface. Provide methods to add and remove objects, which will encapsulate the actual insertion or removal of data in the data store. Provide methods that select objects based on some criteria and return objects. Provide repositories only for aggregate roots that actually need direct access. Keep the client focused on the model, delegating all object storage and access to the repositories. Repository provide methods that allow a client to request objects matching some criteria. ## Chapter 7: Using the Language: An Extended Example The model organises domain knowledge and provides a language for the team. Each object in the model has a clear meaning. To prevent domain responsibilities from being mixed with those of other parts of the system apply layered architecture. Modeling and design is not a constant forward process. It will grind to a halt unless there is a frequent refactoring to take advantage of new insights to improve the model and the design. The real challenge is to actually find an incisive model, one that captures subtle concerns of the domain experts and can drive a practical design. Ultimately, we hope to develop a model that captures a deep understanding of the domain. Refactoring is the redesign of software in ways that do not change its functionality. Rather than doing elaborate up-front design decisions, developers take code through a continuous series of small, discrete design changes, each leaving existing functionality unchanged while making the design more flexible or easier to understand. Initial models usually are naive and superficial, based on shallow knowledge. Versatility, simplicity and explanatory power come from a model that is truly in tune with the domain. You will usually depend on creativity and trail and error to find good ways to model the concepts you discover. ## Chapter 8: Breakthrough The returns from refactoring are not linear. Usually there is marginal return for a small effort, and the small improvements add up. Slowly but surely, the team assimilates knowledge and crunches it into a model. Each refinement of code and model gives developers a clearer view. This clarity creates the potential for a breakthrough. Don't become paralysed trying to bring about a breakthrough. The possibility usually comes after many modest refactorings. Most of the time is spent making piecemeal improvements, with model insights emerging gradually during each successive refinement. Don't hold back from modest improvements, which gradually deepen the model, even if confined with the same general conceptual framework. ## Chapter 9: Making Implicit Concepts Explicit A deep model has power because it contains the central concepts and abstractions that can succinctly and flexibly express essential knowledge of the user's activities, their problems and their solutions. The first step is to somehow represent the essential concepts of the domain in the model. Refinement comes later, after successive iterations of knowledge crunching and refactoring. But this process really gets into gear when an important concept is recognised and made explicit in the model and design. Transformation of a formerly implicit concept into an explicit one is a breakthrough that leads to a deep model. More often, though, the breakthrough comes later, after a number of important concepts are explicit in the model. Listen to the language the domain experts use. Are there terms that succinctly state something complicated? Are they correcting your word choice? Do the puzzled looks on their faces go away when you use a particular phrase? These are hints of a concept that might benefit the model. Constraints make up a particularly important category of model concepts. They often emerge implicitly, and expressing them explicitly can greatly improve a design. Sometimes constraints find a natural home in an object or separate method. Specification - a predicate that determines if an object does satisfy some criteria. ## Chapter 10: Supple Design The ultimate purpose of software is to serve users. But first, that same software has to serve developers. This is especially true in a process that emphasises refactoring. When software with complex behaviour lacks good a design, it becomes hard to refactor or combine elements. Duplication starts to appear as soon as a developer isn't confident of predicting the full implications of computation. Duplication is forced when design elements are monolithic, so that the parts cannot be recombined. Supple design is the complement to deep modelling. Once you have dug out implicit concepts and made them explicit, you have the raw material. Thorough the iterative cycle, you hammer that material into a useful shape. If developer must consider the implementation of a component in order to use it, the value of encapsulation is lost. Tames should conform to the ubiquitous language so that team members can quickly infer their meaning. Write a test of a behaviour before creating it, to force your thinking into client developer mode. Place as much of the logic of the program as possible into functions, operations that return results with no observable side effects. Decompose design elements (operations, interfaces, classes and aggregates) into cohesive units, taking into consideration your intuition of the important divisions in the domain. Align the model with the consistent aspects of the domain that make it a viable area of knowledge in the first place. Low coupling is fundamental to object design. When you can go all the way. Eliminate all other concepts from the picture. Then the class will be completely self-contained and can be studied and understood alone. Every such self-contained class significantly eases the burden of understanding a module. Where it fits, define an operation whose return type is the same as the type of its arguments. ## Chapter 11: Applying Analysis Patterns > Analysis patterns are groups of concepts that represent a common construction in business modelling. It may be > relevant to only one domain, or it may span many domains. An analysis pattern is a template for solving an organizational, social or economic problem in a professional domain. ## Chapter 12: Relating Design Patterns to the Model Not all design patterns can be used as domain patterns. STRATEGY - Domain models contain processes that are not technically motivated but actually meaningful in the problem domain. When alternative processes must be provided, the complexity of choosing the appropriate process combines with the complexity of the multiple processes themselves, and things get out of hand. Factor the varying parts of process into a separate "strategy" object in the model. Factor apart a rule and the behaviour it governs. Implement the rule or substitutable process following the strategy design pattern. Multiple versions of the strategy object represents different ways the process can be done. COMPOSITE - When the relatedness of nested containers is not reflected in the model, common behaviour has to be duplicated at each level of the hierarchy, and nesting is rigid. Clients must deal with different levels of the hierarchy through different interfaces, even though there may be no conceptual difference they care about. Recursion through the hierarchy to produce aggregated information is very complicated. Define an abstract type that encompasses all members of the composite. Methods that return information are implemented on containers to return aggregated information about their contents. Leaf nodes implement those methods based on their own values. Clients deal with the abstract type and have no need to distinguish leaves from containers. ## Chapter 13: Refactoring Toward Deeper Insight Multifaceted process. There are 3 things you have to focus on: 1. Live in the domain 2. Keep looking at things a different way 3. Maintain an unbroken dialog with domain experts Seeking insight into the domain creates a broader context for the process of refactoring. Refactoring toward deeper insight is a continuing process. Implicit concepts are recognised and made explicit. Development suddenly comes to the brink of a breakthrough and plunges through to a deep model. ## Chapter 14: Managing Model Integrity Total unification of the domain model for a large system will not be feasible or cost-effective. BOUNDED CONTEXT. Multiple models are in play on any large project. Yet when code based on distinct models is combined, software becomes buggy, unreliable and difficult to understand. Communication among team members becomes confused. It is often unclear in what context a model should not be applied. Therefore, explicitly define the context within which a model applies. Explicitly set boundaries in terms of team organisation, usage within specific parts of the application. And physical manifestations such as code bases and database schemas. Keep the model strictly consistent within these bounds, but don't be distracted or confused by issues outside. CONTINUOUS INTEGRATION. When a number of people are working in the same bounded context, there is a strong tendency for the model to fragment. The bigger the team, the bigger the problem, but a few as three or four people can encounter serious problems. Yet breaking down the system into even-smaller contexts eventually loses a valuable level of integration and coherency. Therefore, institute a process of merging all code and other implementation artefacts frequently, with automated tests to flag fragmentation quickly. Relentlessly exercise the ubiquitous language to hammer out a shared view of the model as the concepts evolve in different people's heads. CONTEXT MAP. People on other teams will not be very aware of the context sounds and will unknowingly make changes that blur the edges or complicate the interconnections. When connections must be made between different contexts, they tend to bleed into each other. Therefore, identify each model in play on the project and define its bounded context. This includes the implicit models of non-object-oriented subsystems. Name each bounded context, and make the names part of ubiquitous language. Describe the points of contact between the models, outlining explicit translation for any communication and highlighting any sharing. SHARED KERNEL. Uncoordinated teams working on closely related applications can go racing forward for a while, but what they produce may not fit together. They can end up spending more on translation layers and retrofitting than they would have on continuous integration in the first place, meanwhile duplicating effort and losing the benefits of a common ubiquitous language. Therefore, designate some subset of the domain that the two teams agree to share. Of course this includes, along with this subset of the model, the subset of code or of the database design associated with that part of the model. This explicitly shared stuff has special status, and shouldn't be changed without consultation with the other team. Integrate a functional system frequently, but somewhat less often than the pace of continuous integration within the teams. At these integrations, run the tests of both teams. CUSTOMER / SUPPLIER DEVELOPMENT TEAMS. The freewheeling development of the upstream team can be cramped if the downstream team has no veto power over changes, or if procedures for requesting changes are too cumbersome. The upstream team may even be inhibited, worried about breaking the downstream system. Meanwhile, the downstream team can be helpless, at the mercy of upstream priorities. Therefore, establish a clear customer / supplier relationship between the two teams. In planning sessions, make the downstream team play the customer role to the upstream team. Negotiate the budget and tasks for downstream requirements so that everyone understands the commitment and schedule. CONFORMIST. When two development teams have an upstream / downstream relationship in which the upstream has no motivation to provide for the downstream team's needs, the downstream team is helpless. Therefore, eliminate the complexity of translation between bounded contexts by slavishly adhering to the model of the upstream team. ANTI-CORRUPTION LAYER. When a new system is being built that must have a large interface with another, the difficulty of relating two models can eventually overwhelm the intent of the new model altogether, causing it to be modified to resemble the other system's model, in an ad hoc fashion. Therefore, create an isolating layer to provide clients with functionality in terms of their own domain model. The layer talks to the other system through its existing interface, requiring little or no modification to the other system. SEPARATE WAYS. Integration is always expensive. Sometimes the benefit is small. Therefore, declare a bounded context to have no connection to the others at all, allowing developers to find simple, specialised solutions within this small scope. OPEN HOST SERVICE. When a subsystem has to be integrated with many others, there is more and more to maintain and more and more to worry about when changes are made. Therefore, define a protocol that gives access to your subsystem as a set of services. PUBLISHED LANGUAGE. Direct translation to and from the existing domain models may not be a good solution. Those models may be overly complex or poorly factored. Therefore, use a well-documented shared language that can express the necessary domain information as a common medium of communication, translating as necessary into and out of that language. ## Chapter 15: Distillation CORE DOMAIN. In designing a large system, there are so many contributing components, all complicated and all absolutely necessary to success, that the essence of the domain model, can be obscured and neglected. Therefore, boil the model down. Make the core small. GENERIC SUBDOMAINS. Anything extraneous makes the core domain harder to discern and understand. Therefore, identify cohesive subdomains that are not the motivation for your project. Factor out generic models of these subdomains and place them in separate models. DOMAIN VISION STATEMENT. In later stages of development, there is a need for explanation the value of the system that does not require an in-depth study of the model. Therefore, write a short description of the core domain. Keep it narrow. Write this statement early and revise it as you gain new insight. HIGHLIGHTED CORE. The mental labor of constantly filtering the model to identify key parts absorbs concentration better spent on design thinking, and it requires comprehensive knowledge of the model. Therefore, write a brief document that describes the core domain and the primary interactions among core elements. COHESIVE MECHANISMS. Computations sometimes reach a level of complexity that begins to bloat the design. The conceptual *what* is swamped by the mechanistic *how*. Therefore, partition a conceptually cohesive mechanism into a separate lightweight framework. SEGREGATED CODE. Elements in the model may partially serve the core domain and partially play supporting role. Core elements may be tightly coupled to generic ones. Therefore, refactor the model to separate the core concepts from supporting players and strengthen the cohesion of the core while reducing its coupling to other code. ABSTRACT CORE. When there is a lot of interaction between subdomains in separate modules, either many references will have to be created between modules, which defeats much of the value of the partitioning or the interaction will have to be made indirect, which makes the model obscure. Therefore, identify the most fundamental concepts in the model and factor them into distinct classes, abstract classes or interfaces. ## Chapter 16: Large-Scale Structure EVOLVING ORDER. Design free-for-all's produce systems no one can make sense of as whole. Therefore, let this conceptual large-scale structure evolve with the application, possibly changing to a completely different type of structure along the way. Don't over constrain the detailed design and model decisions that must be made with detailed knowledge. SYSTEM METAPHOR. Software decisions tend to be very abstract and hard to grasp. Developers and users alike need tangible ways to understand the system and share a view of the system as a whole. Therefore, organise the design around metaphor and absorb it into the ubiquitous language. RESPONSIBILITY LAYERS. When each individual object has handcrafted responsibilities, there are no guidelines, no infirmity and no ability to handle large swaths of the domain together. Therefore, look at the conceptual dependencies in your model and the varying rates and sources of change of different parts of your domain. Refactor the model so that the responsibilities of each domain object fit nearly within the responsibility of one layer. KNOWLEDGE LEVEL. In application in which the roles and relationships between entities vary in different situations, complexity can explode. Objects end up with references to other types to cover a variety of cases, or with attributes that are used in different ways in different situations. Therefore, create a distinct set of objects that can be used to describe and constrain the structure and behaviour of the basic model. PLUGGABLE COMPONENT BEHAVIOUR. When a variety of applications have to interoperate, all based on the same abstractions but designed independently, translations between multiple bounded contexts limit integration. Duplication and fragmentation raise costs of development and installation. Therefore, distill an abstract core of interfaces and interactions and create a framework that allow diverse implementations of those interfaces to be freely substituted. ================================================ FILE: books/ddia.md ================================================ [go back](https://github.com/pkardas/learning) # Designing Data-Intensive Applications: The Big Ideas Behind Reliable, Scalable, and Maintainable Systems Book by Martin Kleppmann - [Chapter 1: Reliable, Scalable and Maintainable Applications](#chapter-1-reliable-scalable-and-maintainable-applications) - [Chapter 2: Data Models and Query Languages](#chapter-2-data-models-and-query-languages) - [Chapter 3: Storage and Retrieval](#chapter-3-storage-and-retrieval) - [Chapter 4: Encoding and Evolution](#chapter-4-encoding-and-evolution) - [Chapter 5: Replication](#chapter-5-replication) - [Chapter 6: Partitioning](#chapter-6-partitioning) - [Chapter 7: Transactions](#chapter-7-transactions) - [Chapter 8: The Trouble with Distributed Systems](#chapter-8-the-trouble-with-distributed-systems) - [Chapter 9: Consistency and Consensus](#chapter-9-consistency-and-consensus) - [Chapter 10: Batch Processing](#chapter-10-batch-processing) - [Chapter 11: Stream Processing](#chapter-11-stream-processing) - [Chapter 12: The Future of Data Systems](#chapter-12-the-future-of-data-systems) ## Chapter 1: Reliable, Scalable and Maintainable Applications May applications today are data-intensive, CPU is not a problem but amount of data, its complexity and speed of change. They are built from standard building blocks: database, cache, search index, stream processing, batch processing. These building blocks have many variants. *Reliability* - performs as expected, tolerates user's mistakes, good performance, continues to work even if things go wrong. Hardware faults - on a cluster with 10 000 disks, you can expect, on average, one disk to die per day. Nowadays, multi-machine redundancy is no longer required - only in few use cases. Software errors - e.g. many applications hang simultaneously on 30.06.2012 because of bug in Linux kernel. This kind of bugs lie dormant for a long time until they are triggered by an unusual set of circumstances. Human errors - humans are responsible for the majority of errors. There are measures that can be taken in order to prevent the errors: - well-defined abstractions, easy to use tools, interfaces that discourage doing the wrong things - provide fully functional non-production sandbox environment where people can explore and experiment with real data - test thoroughly at all levels (unit tests, integration, ...) - provide tools that can recompute the data in case of errors in the past - set up detailed monitoring *Scalability* - system's ability to cope with increased load. Load can be described with a few numbers (load parameters) , e.g. requests per second, read/write ratio, number of simultaneous connections, hit rate on cache or something else. *Describing performance* Response times (client waiting time) vary, always look at averages or medians (p50). In order to know how bad you outliers are you need to look at 95th, 99th and 99.9th percentiles. High percentiles (tail latencies) are important because they directly affect users' experience. Anyhow, optimising 99.99th percentile might be really expensive. SLO (service level objectives) and SLA (service level agreements) - contracts that define the expected performance and availability of a service. Example SLA: service up and median response time < 200 ms, 99th percentile < 1s. High percentiles are extremely important in backend services that are called multiple times as part of serving a single end-user request. *Approaches for coping with load* Architecture might need to be reworked on every order of magnitude load increase. Because application could handle 2x bigger load, it doesn't mean it will handle 10x that load. Scaling up / vertical scaling- moving to more powerful machine. Scaling out / horizontal scaling - distributing the load across multiple machines. Distributing stateless services across multiple machines is easy, stateful data systems form a single node to a distributed setup can introduce a lot of additional complexity. There is no a single, universal approach for all applications, design is very often highly specific. *Maintainability* Design and build systems that will minimise pain during maintenance. Make it easy to understand for new engineers. Allow for easy changes, adapting for unanticipated use cases as requirements change. *Simplicity* Project's complexity grove with time, this slows everyone down. Symptoms of complexity: - explosion of state space - tight coupling of modules - tangled dependencies - inconsistent naming and terminology - special-casing to work around issues Complex software makes it easy to introduce bugs, system makes it harder to understand hidden assumptions, unintended consequences and many more. **Simplicity should be a key goal for the systems we build**. One of the best tools for removing complexity is *abstraction*. Great abstraction can hide implementation details behind a clean interface. *Evolvability* Requirements change, you learn new facts, new use cases emerge, priorities change, etc. Agile provides a framework for adapting to change. Modify system and adapt it to changing requirements - pay attention to simplicity and abstractions. ## Chapter 2: Data Models and Query Languages *Relational Model vs Document Model*. Relational Databases turned out to generalise very well. NoSQL (*Not Only SQL*) is the latest attempt to overthrow the relational model's dominance. Driving forces behind NoSQL: - a need for greater scalability - very large datasets / very high write throughput - many open source projects - specialised query operations - frustration with restrictiveness of relational model A rule of thumb: is you are duplicating values that could be stored in just one place the schema is not normalised. Many-to-many relationships are widely used in relational databases, NoSQL reopened the debate on how best to represent such relationship. If your data has document-like structure, then it's probably a good idea to use a document model. The relational database and its shredding (splitting document-like structure into multiple tables) can lead to unnecessary complicated application code. Problems with document model: you can not access nested object directly you need to use access path, also it is not performing well in many-to-many relationships. Database schema can be compared to languages: relational - compiled language with static typing, document - dynamic ( runtime) type checking - schema on read. Data locality - because document databases store document as a string continuous string - JSON, XML, ... - often access will be faster because of locality, if data is split across multiple tables -> multiple disks -> more disk seeks -> more time required. However, the document database will need to load entire document even if you need a small portion of it. *Query Languages for Data* SQL is declarative - you define what you want, and it is up to the computer do determine how to get this data. Most programming languages are imperative - you define how to process the data. *MapReduce Querying* MapReduce - programming model for processing large amounts of data in bulk across many machines. Limited form of MapReduce is supported by some noSQL data-stores. Something between declarative and imperative programming. *Graph-Like Data Models* Very good approach form data with many-to-many relationships. Each vertex has: ID, set of outgoing edges, set of outgoing edges, a collection of properties (key-value pairs). Each edge has: ID, the tail vertex, the head vertex, label describing the type of relationship, a collection of properties (key-value pairs). Graphs give great flexibility in modeling relationships. e.g. France has departments and regions, whereas the US has counties and states. Cypher is a declarative query language for property graphs, created for Neo4j DB, e.g.: find the names of all people who emigrated from the US to Europe: ```cypher MATCH (person) -[:BORN_IN]-> () -[:WITHIN*0..]-> (us:Location {name: "United States"}), (person) -[:LIVES_IN]-> () -[:WITHIN*0..]-> (eu:Location {name: "Europe"}) RETURN person.name ``` This can be expressed in SQL (recursive common table expressions), but with one difficulty, `LIVES_IN` might point to any location (region, country, state, continent), here we are interested only in the US and Europe. 4 lines in Cypher vs 29 lines in SQL. *Triple-Stores* Very similar to graph model, all information stored in the form of very simple three-part statements: `(subject, predicate, object)`, e.g. `(Jim, likes, bananas)`. ## Chapter 3: Storage and Retrieval In order to tune a storage engine to perform well on your kind of workload, you need to have a rough idea of what the storage engine is doing under the hood. *Data Structures That Power Your Database* Hash Indexes. For example: Key and offset pairs. SSTable - Sorted String Table. B-Trees - most widely used indexing structure, standard index implementation for almost all relational databases and for many non-relational databases. B-trees keep key-value pairs sorted by key, which allows efficient key-value lookups. The number of references to child pages in one page of the B-tree is called the branching factor. A B-tree with *n* keys always has depth of *O(log n)*. Most databases can fit into a B-tree that is 3-4 levels deep. 4-level tree of 4KB pages with a branching factor of 500 can store up to 256TB of data. In order to make db resilient to crashes, it is common for B-tree implementations to include an additional data structure on disk - WAL - write-ahead log - append only file, every B-tree modification must be written before it can be applied on the pages of the tree. When db crashes, this log is used to restore the B-tree to consistent state. LSM-tree: - faster for writes - can be compressed better, thus often produce smaller files on disk - lower storage overheads - compaction process can sometimes interfere with the performance of ongoing reads and writes - if throughput is high and compaction is not configured carefully, compaction might not keep up with the rate of incoming writes B-trees are so old, and so well optimised so that they can deliver good, consistent performance for many workloads. Key-value indexes are for primary key index in the relational model. It is also common to have secondary index. They don't have to be unique - this can be solved for example by appending row ID. Clustered index - storing all row data within the index. Concatenated index - multi-column index, combines several fields into one key by appending one column to another. What if you search for misspelled data or similar data. Lucene is able to search text for words within a certain edit distance. Data structures discussed so far are specific for disks. However, as RAM becomes cheaper and many datasets are feasible to keep in memory. This led to the development of in-memory databases. Some in-memory key-value stores (Memcached) are intended for caching, data can be lost on machine restart. Other in-memory databases aim for durability, which can be achieved with special battery-powered RAM, by writing a log changes to disk or replicating memory state to other machines. When restarted it needs to load the data from the disk of from a replica. Even though it is an in-memory database, a disk is still used. Other in-memory databases with relational model: VoltDB, MemSQL, Oracle TimesTen. RAMCloud is a key-value store, Redis and Couchbase provide weak durability by writing to disk asynchronously. In-memory databases achieve better performance. OLTP - Online Transaction Processing - interactive applications - look up fa small number of records, insert or update records based on user's activity. OLAP - Online Analytic Processing - second patterns - analytic queries. In 90s companies stopped using OLTP systems for analytics purposes and shifted to OLAP for running analytics on a separate database. This separate database is called a data warehouse. Data warehouse - separate database that analyst can query without affecting OLTP operations. Read-only copy of the data. Data extracted from OLTP databases, transformed into analysis-friendly schema. Process of getting data info the warehouse is known as Extract-Transform-Load. Biggest advantage of OLAP for analysis is that this database can be optimised for large queries. Many data warehouses use star schema (dimensional modeling). Variation of this schema is called the snowflake schema. Snowflakes are more normalised than stars. Fact tables are often 100 columns wide, however `SELECT * ` queries are rarely used. In most OLTP databases, storage is laid out in a row-oriented fashion. How to execute queries more efficiently? The idea behind column-oriented storage is simple: don't store all the values from one row together, but store all values from each column together. e.g. one file = one column - much faster than parsing each row. Columns can be compressed using for example bitmap encoding - unique values encoded using bits. Efficient in situations where only few unique values and millions of records. Column compression allows mor rows from a column to fit in L1 cache. ## Chapter 4: Encoding and Evolution Rolling update / staged rollout - deploying the new version to a few nodes at a time, checking whether the new version is running smoothly. With client-side applications you are at mercy of the user, who may not install the update for some time. This means that old and new versions of the code might co-exist for some time. Backward compatibility - newer code can read data that was written by older code (normally not hard). Forward compatibility - older code can read data that was written by newer code (this is trickier). Programs usually work with data in 2 representations: - in memory - objects, lists, arrays, trees - data structures optimised for efficient access and manipulation by the CPU - byte sequence structures - for example JSON The translation from the in-memory to a byte sequence is called encoding. The reverse is called decoding (also: parsing, deserialization, unmarshalling). Many programming languages have built-in support for encoding in-memory data structures. Python has pickle, Java has Serializable, Ruby has Marshal, however: - encoding is tied to programming language - potential source of security issues - Java's built-in serialisation is said to have bad performance In general, it is bad idea to use language's built-in encoding for anything other than very transient purposes. JSON: - built-in support in browsers - distinguishes strings and numbers - good support for unicode, no support for binary strings XML: - too verbose - no distinction between numbers and strings - good support for unicode, no support for binary strings CSV: - less powerful than XML and JSON - no distinction between numbers and strings - no data schema Despite flaws of JSON, XML and CSV they are good enough for many purposes, and they will remain popular. JSON is less verbose than XMAL, but still uses a lot of space - this might be an issue when you are dealing with terabytes of data. This led to the development of binary encodings for JSON - BSON, BJSON, UBJSON, BISON. XMAL has also its binary encodings - WBXML and Fast Infoset. However, none of them are widely adopted. Apache Thrift (Facebook), Protocol Buffers (Google) are binary encoding libraries that are based on the same principle. Schema defined in interface definition language, this schema can generate code for encoding and decoding data. Field numbers in Apache Thrift are used for more compact encoding (no need for passing field names through the wire - CompactProtocol). Required / optional makes no difference for encoding, this is used for the runtime. Every field you add after the initial deployment of schema must be optional of have default value. Removing is like adding, you can remove only optional fields. Also, with ProtoBuf / Thrift you can never use the same tag number again. Avro is another binary encoding format, it has optional code generation for dynamically typed programming languages. Data can flow through: - database - services - REST and RPC, services are similar to databases, they allow clients to submit and query data. A key design goal of a service-oriented / microservices architecture is to make the application easier to change and maintain by making services independently deployable and evolvable. REST is not a protocol, but rather a design philosophy that builds upon the principles of HTTP. SOAP - XML-based protocol for making network API requests. RPC - Remote Procedure Call - seems convenient at first, but the approach is fundamentally flawed, because a network request is very different from a local function call: - local function is predictable - it can either succeed or fail depending on the input, a network request is unpredictable - connection might be lost - a local function call either returns a result or throws an exception, a network request may return without a result - timeout - retry mechanism might cause duplication (fist request went through), unless you build deduplication mechanism - duration of remote call depends on the network congestion - when you call a local function you can effectively pass references - if client and server use different languages, data translation might end up ugly Despite these problems RPC is not going away, modern frameworks are more explicit about the fact that a remote call i different from local function invocation. - message passing - something between database passing and services. Similar to RPC because client's request is delivered with low latency, similar to databases because message is not sent via a direct network connection but goes through message broker. Message brokers have a couple of advantages comparing to RPC: - can acs as a buffer when recipient is unavailable - can automatically redeliver messages - avoids the sender to know the IP address and port - one message can be sent to multiple recipients - logical decoupling between sender and receiver ## Chapter 5: Replication Shared-Nothing Architecture - each machine or virtual machine running the database is called a node. Each node uses its own CPU, RAM and disks independently. Any coordination between nodes is done at the software level using network. Replication - means keeping a copy of the same data on multiple machines that are connected via a network. Why? - to reduce latency - copy close to the users - to allow the system to continue working - to scale out If data is not changing, replication is easy, for dealing with replication changes, following algorithms can be used: single-leader, multi-leader and leaderless replication. Leaders and Followers - each node (replica) stores a copy of the database . One of the replicas is designated to be a leader (master), when clients want to write to the database, they must send their requests to the leader. Other replicas - followers (slaves), take the log from the leader and updates local copy of the data, applying all writes in the same order as they were processed by the leader. Writes are accepted only to the leader, read can be performed using any follower. On follower failure, if the connection between leader and follower is temporarily corrupted, follower can recover easily, because it knows the last processed transaction from the log. It can request missing data from the last successful transaction. Leader failure is trickier. One of the followers can be promoted to be the new leader, for example replica with the most recent data (election) - data loss minimisation. Implementation of Replication Logs: - Statement-based replication - leader logs every request (statement) - for relational database this means thet every INSERT / UPDATE / DELETE statement is forwarded to followers, each follower executes received SQL statement (as if it had been received from a client) - Problems - what about NOW, RAND - nondeterministic, what about auto-incrementing fields, what about triggers. There are some workarounds, like sending request and result or requiring deterministic transactions. - Write-ahead log (WAL) shipping - log is append-only sequence of bytes containing all writes, this log can be used to build replica. This method is used in PostgreSQL and Oracle. Disadvantage of this approach is that log contains low-level information - like which disk blocks were changed, so replication is closely coupled to the storage engine ( or even storage engine version!). - Logical log replication - alternative approach that uses different log format for replication - decoupling. Usually a sequence of records describing writes to database tables at the granularity of a row. Easier backward compatibility - leaders and followers can run different engine versions - Trigger-based replication - triggers have ability to log changes to a separate table, from which an external process can read. This allows for replicating for example subset of data. Problems with replication lag: - leader-based replication is cool when we need to scale reads, not necessarily writes - common on the web - synchronous replication - single node failure can make entire system unavailable - asynchronous replication - follower might fall behind -> inconsistent data (this is temporary situation, if you stop writing for a while, the followers will catch up and become consistent with the leader - eventual consistency) Replica lag - anomalies: - if user writes and then views, the new data might not yet have reached the replica. Read-your-writes consistency - needed guarantee that if the user reloads the page, whey will always see updates they submitted themselves. - Solution: owner of the profile views data from the leader, other users view from replica. There are modifications, for example: if last update older than 1m -> view from replica. - when reading from asynchronous followers is that user can see things moving back in time - happens when user makes several reads from different replicas - Solution: monotonic reads - guarantee stronger than eventual consistency, if user makes several reads in sequence, they will not see time go backward (never data after older data) - consistent prefix reads - is a sequence of writes happens in certain order, then anyone reading those writes will see them appear in the same order - Solution: make sure that any writes that are casually related to each other are written to the same partition OR use algorithm that keep track of casual dependencies When working with an eventual consistent system, it is worth thinking about how the application behaves if the replication lag increases to several minutes or hours. Multi-Leader Replication - more than one node accepting writes, each leader simultaneously acts as a follower to the other leaders. It rarely makes sense to use a multi-leader setup within a single datacenter, because benefits rarely outweigh the added complexity, however there are some situations in which this configuration makes sense: - multi-datacenter operation - one leader in each datacenter, multi-leader setup requires conflict resolution mechanism which can be problematic. Multi-leader replication is often considered dangerous territory that should be avoided if possible. - clients with offline operation - for example calendar app have to work even if it is disconnected from the internet, if you make changes while you are offline then they need to be synced with a server and all other devices. This basically means every device has a local database that acts as a leader. For example CouchDB is designed for this mode of operation. - collaborative editing - multiple people editing the same document - e.g. Google Docs, very similar case to the previous one. If you want to guarantee that there will be no editing conflicts, the application must obtain a lock on the document before user can edit - this collaboration model is equivalent to single-leader replication with transaction on the leader. Handling Write Conflicts: - make the conflict detection synchronous - wait for the write to be replicated to all replicas before telling the user that write was successful - avoid conflicts - all writes can go through the same leader, requests from particular user are always routed to the same datacenter and use the leader in that datacenter for writing and reading. - each replica should converge toward consistent state - custom conflict resolution - this might depend on the application, code might be executed on write or on read Automatic Conflict Resolution - Amazon was frequently cited example of surprising effects due to conflict resolution handler - customers were seeing items removed from the cart. Some ideas for automatic conflict resolution: - conflict-free replicated datatypes - family of data structures that can be concurrently edited by multiple users - merge-able persistent data structures - similar to GIT - operational transformation - algorithm behind Google Docs - designed specifically for concurrent edits of an ordered list of items - e.g. string. Replication topology describes the communication paths along which writes are propagated from one node to another ( circular, star, all-to-all). Leaderless replication - the client sends directly its writes to several replicas, or coordinator node does this on behalf of the client. When one node is down, some data might be down. For this reason when a client reads from the database, it sends its requests to multiple replicas and uses data with the most version number. Eventually all the data will be copied to every replica. 2 approaches with dealing with inconsistent data: whenever client notices inconsistency or background process looking for differences in the data. For example in DynamoDB it is possible to set minimum number of replicas that saved the data to mark write as valid. It is important to monitor replication lag, even if your application can tolerate stale reads. Dynamo-style databases allow several clients to concurrently write to the same key - this means potential conflicts! Events may arrive in a different order at different nodes, due to network delays and partial failures (replicas might store different values). In order to become eventually consistent, the replicas should converge toward the same value. It is up to the developer to resolve conflict: - last write wins - discard older values - detecting happens-before operations (btw. two operations might be considered concurrent when they overlap in time, not necessarily at the same time) - merge concurrently written values - use version vectors - version number per replica and per key, each replica increments its own version number ## Chapter 6: Partitioning Partitioning - breaking up the data into partitions (each piece of data belongs to exactly one partition). The main reason for partitioning is scalability - different partitions can be placed on different nodes. Partitioning is usually combined with replication. Copies of each partition are stored on multiple nodes. The goal with partitioning is to spread the data and the query load evenly across nodes. If every node takes fair share, e.g. 10 nodes should be able to handle 10x much data. If partitioning is unfair it is called skewed. Skew makes partitioning less effective. In order to reduce skew data needs to be distributed evenly. One way is to assign a continuous range of keys to each partition (PARTITION 1: A-B, PARTITION 2: C-D, ...). The ranges of keys are not necessarily evenly spaced, for example majority of entries with letter A. Partition boundaries need to be carefully selected by application developer with domain knowledge. Partitioning by data is problematic too - all writes going to single partition, whereas remaining partitions are idle. For example, you could solve this issue by partitioning first by name (for example sensor name) and then by the time, this will balance the load. Skew can be reduced by using hash function that is evenly distributing data across partitions. The partition boundaries can be evenly spaced or chosen pseudorandomly (consistent hashing). Consistent Hashing - a way of evenly distributing load across an internet-wide system of caches such as CDN. Uses randomly chosen partition boundaries to avoid the need for central control or distributed consensus. Using hash of the key loses the ability to do efficient range queries (sort order lost). Hashing a key can reduce hot spots, but can not reduce them entirely. For example celebrity on social media can cause storm of activity - this may lead to many writes to the same key. It is up to application developer to handle hot spots - e.g. add random prefix. Secondary indexes are slightly more problematic because they don't identify a record uniquely. There are 2 main approaches to partitioning a database with secondary indexes: - document-based (local index) - each partition have (local) partitioned secondary indexes, this means reading requires extra care. I am looking for a red car - needs to scatter query to two partitions - quite expensive. However, widely used. - term-based (global index) - instead of each partition having its own secondary index, we can construct a global index. A global index also needs to be partitioned - for example secondary key `color:red`, cars with names a-d on partition 0, rest on partition 1. Reads are more efficient, writes are slower. Data change in the database - throughput increases, dataset increases, machine can fail. Rebalancing - the process of moving data from one node to another. After rebalancing data should be shared fairly between nodes, when rebalancing database should remain available for writes and reads and only minimal amount of data should be moved between nodes. DO NOT USE hash mod N when rebalancing between partitions. Problem with this approach is that number of nodes changes. This requires moving more data than necessary when new node is added. Better solution is to create fixed number of partitions (more partitions than the nodes, e.g. 10 nodes - 1000 partitions) . If new node is added to the cluster, it can steal few partitions from the others. The only thing that changes is partitions assignment. This is followed by for example by ElasticSearch (number of partitions set up at the beginning). Choosing the right number of partitions is difficult. Dynamic partitioning is suitable for key range partitioning. Automatic rebalancing can be unpredictable, because it is expensive operation - rerouting requests and moving a large amount of data, this can overload the network. For this reason it is a good approach to have human administrator performing rebalancing. How to route request to particular partition? How can system know where is data? This problem is known as service discovery. System can keep track of the data in separate register. Another possibility is that client connects to any node, if node can not serve the request, client is forwarded to another node. ## Chapter 7: Transactions Overhead of transactions > lack of transactions and coding around the lack of transactions. A transaction is a way for an application to group several reads and writes together into a logical unit. Either entire transaction succeeds (commit) or fails (abort, rollback). If transaction fails, application can retry. With this error handling is much simpler. However sometimes it might be beneficial to weaken transactions or abandon them entirely (for higher availability). The safety guarantees provided by transactions are often described by ACID acronym. Implementations of ACID might vary between DBMSs. - Atomicity - (atomic refers to something that can not be broken into smaller parts), if error happens in the middle of transaction, it has to be reverted. If a transaction was aborted, the application can be sure that it didn't change anything, so it can be safely retried. Perhaps "abortability" would have been a better term than atomicity. - Consistency - (terribly overloaded term) in ACID - certain statements about the data must be always true (for example correct account balance in banking system). If a transaction starts with a database that is valid, any writes during the transaction preserve the validity. - Isolation - most databases are accessed by several clients at the same time, if they are accessing the same database records, you can run into concurrency problems. Isolation means that concurrently executing transactions are isolated from each other, they can not step on each other's toes. The classic database textbooks define isolation as serialisability (however this is rarely sued because it has performance penalty). - Durability - the promise that once a transaction has committed successfully, any data it has written will not be forgotten, even if there is a hardware fault or the database crash. Anyhow, perfect durability does not exist (for example all backup destroyed at the same time). ACID databases are based on this philosophy: "if the database is in danger of violating its guarantee of atomicity, isolation or durability, it would rather abandon the transaction entirely than allow it to remain half-finished". Isolation make life easier by hiding concurrency issues. In reality serialisation is not that simple, it has performance cost , therefore it so common for systems to use weaker levels of isolation, which protect against some concurrency issues. Common wisdom: "Use ACID databases if you are handling financial data", however many popular relational databases use weak isolation even though are considered ACID. Read committed - most basic level of transaction isolation, makes 2 guarantees: - no dirty reads - you will only see data that has been committed - no dirty writes - you will only override data that has been committed Snapshot isolation - read committed is not solving all the issues (for example non-repeatable reads - when you select data in the middle of transaction). Data unavailable for few seconds is not a problem, more problematic are long-lasting data inconsistencies. Read committed is a boon for long-running , read-only queries such as backups and analytics. Transaction should see a consistent snapshot of the database, frozen at a particular point in time (so data is not changing when it is being processed). Key principle of snapshot isolation is: readers never block writers and writers never block readers. FOR UPDATE tells the database to lock all rows returned by this query. Serialisable isolation is usually regarded as the strongest isolation level. It guarantees that even though transactions may execute in parallel, the end result is the same as if they had executed one at a time, serially, without any currency. Database prevents all possible race conditions. The simplest way of avoiding concurrency problems is to remove the concurrency entirely - one transaction at a time, in serial order or a single thread. Stored procedures gained bad reputation for various reasons: each db vendor has its own language for stored procedures, code running in a database is difficult to manage (hard to debug, awkward to version and deploy, trickier to test), badly written procedure may harm overall DB performance. Modern implementations of stored procedures have abandoned PL/SQL and use existing general-purpose programming languages instead. Serial execution of transactions makes concurrency control much simpler, but limits the transaction throughput of the database to the speed of a single CPU core on a single machine. Simple solution is to partition the database, each CPU core have its own partition. However, if partition needs to access multiple partitions, the database must coordinate across all the partitions that it touches. Serial execution is a viable way of achieving serialisable isolation within certain constraints: - every transaction must be small and fast - write throughput must be low enough to be handled on a single CPU core - cross-partition transactions are possible, but there is a hard limit to the extent to which they can be used 2PL - Two-Phase Locking - widely used algorithm for serialisability in databases. Similar to "no dirty writes" - if two transactions concurrently try to write the same object, the lock ensure that the second writer must wait until the first one has finished its transactions before it may continue. More specifically: - If transaction A reads and B wants to write - B must wait until A commits or aborts - If A writes and B wants to read, B must wait until A commits or aborts 2PL - writers don't just block other writes, they also block readers and vice-versa. The big downside of 2PL is performance - worse throughput and response times comparing to weak isolation (because of overhead of acquiring and releasing locks). Also called a "pessimistic concurrency control mechanism" - better to wait until situation is safe before doing anything. SI - Serialisable Snapshot Isolation - full serialisability with small performance penalty compared to snapshot isolation. Very young technology - 2008. Called "optimistic concurrency control technique". Instead of blocking potentially dangerous transactions, it allows them to work, hoping everything will turn out all right. When transaction wants to commit, database checks if everything is fine. It performs badly in high contention (many transactions accessing the same object) - many of transactions need to be aborted. Reads from the database are made based on snapshot isolation. ## Chapter 8: The Trouble with Distributed Systems Anything that can go wrong, will go wrong. Working with distributed systems is fundamentally different from writing software on a single computer. When writing software that runs on several computers, connected by a network, the situation is fundamentally different. Partial failure - some parts of the system are broken in an unpredictable way. Partial failures are nondeterministic. Nondeterminism and partial failures is what makes distributed systems hard to work with. High-performance computing - supercomputers with thousands of CPUs are used for computationally intensive scientific computing tasks, such as whether forecasting or molecular dynamics. Cloud computing - often associated with multi-tenant data centres, commodity computers connected with an IP network, on-demand resource allocation and metered billing. Traditional enterprise data centres are somewhere between these two extremes. If we want to make distributed systems work, we must accept the possibility of partial failure and build fault-tolerance mechanisms into the software. We need to build a reliable system from unreliable mechanisms (like communication over the internet, network may fail, bits might be lost, however it somehow works, engineers managed to build something reliable basing on unreliable foundations). What can go wrong when sending a request: - request may be lost - request might be waiting in a queue and will be delivered later - remote node may have failed or temporarily stopped responding - request might have been processed but was lost on a way back or was delayed or will be delivered later Network problems can be surprisingly common, even in controlled environments like a datacenter operated by one company ( even 12 network faults per month in a medium-sized datacenter, half of them disconnected a single machine and a half an entire rack). EC2 is notorious for having frequent transient network glitches. Many systems need to automatically detect fault nodes, for example: load balancer needs to stop sending requests to a node that is dead. Unfortunately it is hard to tell whether a node is working or not. Timeout is the only sure way of detecting a fault. Appropriate duration of timeout is difficult to estimate. The telephone network uses circuit - a fixed, guaranteed amount of bandwidth between 2 callers. On the other hand TCP dynamically adapts the rate of data transfer to the available network capacity. TCP is optimised for busy networks, circuit would not work for internet's use case. Clocks and time is important, in distributed systems we never know the delay between send and received. Time-of-day clocks - returns current date and time according to some calendar. Usually synchronised with NTP (Network Time Protocol). Time-of-day clocks are unsuitable for measuring time (clock might be reset during measurement, because it was desynchronised). Monotonic clocks - suitable for measuring elapsed time, they are guaranteed to always move forward (time-of-day clock my jump back in time). NTS may adjust monotonic clock frequency if it discovers it is too slow or too fast. Software must be designed on the assumption that the network will occasionally be faulty, and the software must handle such faults gracefully. > Distributed systems are different from programs running on a single computer - there is no shared memory, only massage > passing through unreliable network with variable delays and the systems may suffer from partial failures, unreliable > clocks and processing pauses. There are algorithms designed to solve distributed systems problems: - synchronous model - assumes bounded network delay, process pauses and clock error, this means you know the delay, and it will not exceed some fixed value. This model is not realistic - partially synchronous system - system behaves like a synchronous most of the time, but sometimes exceeds the bounds for network delay, process pauses and clock drift - asynchronous model - any timing assumptions are not allowed - crash-stop faults - algorithm may assume that a node can fail in only one way - by crashing, once crashed never comes back - crash-recovery faults - node can fail at any moment, but has some nonvolatile disk storage that is preserved across crashes - byzantine faults - nodes may do absolutely anything, including trying to trick and deceive other nodes Partially synchronous and crash-recovery faults are the most common models. Safety of a system - nothing bad happens, liveness of a system - something good eventually happens. These two are often used for reasoning about the correctness of a distributed algorithm. ## Chapter 9: Consistency and Consensus Tolerating faults - keeping the service functioning correctly, even if some internal component is faulty. The best way of building fault-tolerant systems is to find some general-purpose abstractions with useful guarantees (e.g. transactions). When working with a database that provides only weak guarantees (e.g. eventual consistency), you need to be constantly aware of its limitations (e.g. when you write and immediately read there is no guarantee that you will see the value you just wrote). LINEARIZABILITY (atomic consistency, strong consistency, immediate consistency) - is to make a system appear as if there were only one copy of the data and all operations are atomic. Easily confused with serialisability (both mean something like "can be arranged in a sequential order"): - Serialisability - an isolation property of transactions, it guarantees that transactions behave the same as if they had executed in some serial order. - Linearisability - a recency guarantee on reads and writes of a register, it doesn't group operations together into transactions, so does not prevent problems like write skew. Use cases for linearisability: - locking and leader election - a single-leader system needs to ensure there is indeed only one leader, not several ( split brain) - it must be linearlisable, all nodes must agree which node owns the lock. - constraints and uniqueness guarantees - for example unique usernames - you need linearisability. Hard uniqueness constraint requires linearisability. - cross-channel timing dependencies - multiple components in a system can communicate which opens a possibility for race conditions CAP theorem has been historically influential but nowadays has little practical value for designing systems. Better way of paraphrasing CAP would be "either consistent or available when partitioned". ORDERING GUARANTEES. Causality imposes an ordering on the events (what happened before what) - question comes before answer, a message is sent before it is received, ... These chains of casually dependent operations define the casual order in the system. If system obeys the ordering imposed by causality, we say it is causally consistent. Linearisability ensures causality. However, it is not the only way of preserving causality - system can be causally consistent without incurring the performance (the strongest possible consistency model that does not slow down due to network errors). Sequence Number Ordering - sequence numbers or timestamps (not really time-of-day clock, but some logical clock) used to order events. If there is not a single leader it is less clear how to generate sequence numbers for operations: - each node can generate its own independent sequence number + node ID - attach timestamp to each operation - preallocate blocks of sequence numbers (1-1000 for node A, 1001-2000 for node B, ...) Methods above allow generating unique sequence numbers efficiently, but do not capture correctly the ordering of operations across different nodes. Lamport timestamp - method for generating sequence numbers that is consistent with causality. Every node keeps track of the maximum counter value it has seen so far, and includes that maximum on every request. Each node appends its node ID to the final counter, if 2 counter values are the same, higher node ID wins. The goal to get several nodes to agree on something is not easy, examples: - leader election - lack of communication may lead to split brain (multiple nodes believe themselves to be the leader) - atomic commit - in a system that supports transactions spanning several nodes, transaction may fail on some nodes but succeed on others (all nodes have to agree on the outcome - abort or accept) The Impossibility of Consensus - there is no algorithm that is always able to reach consensus if there is a risk that a node may crash, in a distributed system we must assume that node may crash, so reliable consensus is impossible. Two-phase locking is an algorithm for achieving atomic transaction commit across multiple nodes (all commit or all abort). 2 phases: - the coordinator begins phase 1 - it sends prepare to each of the nodes, asking whether they are able to commit - the coordinator tracks the responses from the participants, if all say yes - the coordinator sends out a commit request, if any of the participant say no - the coordinator sends an abort request to all nodes This is very similar to wedding ceremony in Western cultures. If the decision was to commit there is no going back, no matter how many retries it takes. The protocol has 2 crucial points of no return. If the coordinator dies, the nodes should communicate and come to some agreement. 2PC has bad reputation because of operational problems, low performance and promising more than it can deliver. ## Chapter 10: Batch Processing > A system cannot be successful if it is too strongly influenced by a single person. Once the initial design is complete > and fairly robust, the real test begins as people with many viewpoints undertake their own experiments. 3 types of systems: - services (online systems) - a service waits for a request or instruction from a client to arrive, when received, the service tries to serve it as quickly as possible. - batch processing (offline systems) - system takes a large amount of input data, runs a job to process it and produces some output data. Batch jobs are often scheduled to run periodically. The primary performance measure is throughput. - stream processing systems (near-real-time systems) - something between online and offline systems. A stream processor consumes inputs and produces outputs (rather than responding to request). Simple Batch Processing can be performed in UNIX via awk, grep and other command line tools (using a chain of commands). The Unix Philosophy - the idea of connecting programs with pipes. This is possible because of common interface (programs operating on file descriptors) of programs, which are small and are doing one thing. The biggest limitation of UNIX tools is that they run only on a single machine and that is where tools like Hadoop come in. MapReduce is a bit like Unix tools, but distributed across potentially thousands of machines. MapReduce jobs read and write files on a distributed filesystem, in Hadoop's implementation of MapReduce the filesystem is called HDFS (Hadoop Distributed File System - reimplementation of the Google File System). HDFS is based on the shared-nothing principle. HDFS consists of a daemon process running on each machine, exposing a network service that allows other nodes to access files stored on that machine. In order to tolerate machine and disk failures, file blocks are replicated on multiple machines. To create a MapReduce job, you need to implement 2 callback functions: - mapper - called once for every inout record, its job is to extract the key and value from the input record. - reducer - the framework takes the key-value pairs produced by the mapper, collects all the values belonging to the same key and calls the reducer with an iterator over collection of values. Principle: > Put the computation near the data it saves copying the input file over the network, reducing network load and increasing locality. In order to achieve good throughput in a batch processing, the computation must be as much as possible local in one machine. HDFS is somewhat like a distributed version of UNIX, where HDFS is the filesystem and MapReduce is a quirky implementation of a UNIX process. When MapReduce was published it was not all new. Some concepts were already known - e.g. massively parallel processing databases. Hadoop vs Distributed Databases: - databases require you to structure data according to particular model, whereas files in a distributed filesystem are just byte sequences. Hadoop opened up the possibility of indiscriminately dumping data into HDFS and later figuring out how to process it further. MPP databases require careful, up-front modeling of the data. The Hadoop has often been used for implementing ETL processes, MapReduce jobs are written to clean up the data, transform it into a relational form and import it into an MPP data warehouse for analytic purposes. - MPP databases are great because they take care of storage, query planning and execution, moreover they use SQL - powerful query language. On the other hand not all kinds of processing can be sensibly expressed as SQL queries ( recommendation systems, full-text search or image analysis). MapReduce gave the engineers the ability to easily run their own code over large datasets. - MPP databases and MapReduce took different approach to handling faults and the use of memory and disk. Natch processes are less sensitive to faults than online systems, because they do not immediately affect users if they fail, and they always can be run again. If a node fails, most MPP databases abort the entire query, MapReduce can tolerate the failure of a map or reduce task. MapReduce dumps partial results to the disk, so they can be restored after failure. MPP databases are more willing to store data in the memory for faster access. MapReduce is designed to tolerate frequent unexpected task termination, not because hardware is unreliable, it is because the freedom to arbitrarily terminate processes enables better resource utilisation in a computing cluster (Google came up wit this idea, this design was designed by their resource usage). MapReduce is just one of many possible programming models for distributed systems. MapReduce has problems with * materialisation* of the data - the process of writing out intermediate state files. Several new execution engines for distributed batch processing were developed in order to fix this problem with MapReduce (data-flow engines) - Spark, Tez, Flink. Dataflow engines provide several options for connecting one operator's output to another's input - sort by key, tak several inputs and to partition them, but skip the sorting, for broadcast hash joins, the same output from one operator can be sent to all partitions of the join operator. Systems like Dryrad and Nephele offer several advantages compared to MapReduce model: - expensive work (e.g. sorting) only performed in places where it is actually required - no unnecessary map tasks - intermediate state between operators kept in memory or written to local disk - operators can start executing as soon as their input is ready - existing JVMs can be reused to run new operators Fully materialised intermediate state to a distributed filesystem makes fault tolerance fairly easy in MapReduce. Spark, FLink and Tes avoid writing intermediate state to HDFS. MapReduce - is like writing the output of each command to a temporary file. Dataflow engines look like much more like UNIX pipes (final result still might be saved to HDFS). High level APIs like Hive, Pig, Cascading and Crunch became popular because programming MapReduce jobs is quite laborious. ## Chapter 11: Stream Processing > Complex systems always evolve from simple system that works. A complex system designed from scratch never works and > cannot be made to work. Batch processing must artificially divide data into chunks of fixed duration (for example: processing a day's worth of data at the end of every day). The problem with daily batch processes is that changes in the input are only reflected in the output a day later, which is too slow for many impatient users. Delay can be reduced by running the processing more frequently. Stream processing - processing every event as it happens. "Stream" refers to data that is incrementally made available over time. Event - a small, self-contained, immutable object containing the details of something that happened at some point in time. An event usually contains a timestamp indicating when it happened (according time-of-day clock). Related events are usually grouped together into a topic or stream. Polling the datastore to check for events that have appeared since it last ran becomes expensive if the datastore is not designed for this kind of usage. It is better for consumers to be notified when new events appear. Common approach for notifying consumers about new events is to use a messaging system - producer sends a message containing the event, which is then pushed to consumers. Direct messaging - direct communication between producers and consumers without going via intermediary nodes. Brokerless libraries: ZeroMQ, nanomsg - pub-sub messaging over TCP or IP multicast. StatsD and Brubeck use unreliable UDP messaging for collecting metrics from all machines on the network and monitoring them. Webhooks - a pattern in which a callback URL of one service is registered with another service, and it makes a request to that URL whenever an event occurs. Message brokers - kind of database, that is optimised for handling message streams. It runs as a server, with producers and consumers connecting to it as clients. Producers write messages, consumers receive them by reading them from the broker. By centralising the data in the broker, these systems can more easily tolerate clients that come and go. A consequence of queueing is also that consumers are generally asynchronous: when a producer send a message it normally only waits for the broker to confirm that it has buffered the message, and it does not wait for the message to be consumed. Multiple consumers - when multiple consumers read messages in the same topic, two main patterns of messaging are used: - load balancing - each message is delivered to one of the consumers, so the consumers can share the work of processing the messages in the topic. This pattern is useful then the messages are expensive to process, and you want to bale to add consumers to parallelize the processing. - fan-out - each message is delivered to all the consumers, equivalent of having several batch jobs that read the same input file. Message brokers use acknowledgements: a client must explicitly tell the broker when it has finished processing a message so that the broker can remove it from the queue. Messages can go out of order because for example network problem and lack of acknowledgement. Log-based message brokers - durable storage approach of databases combined with the low-latency notification facilities of messaging. A log is simply an append-only sequence of records on disk. A producer can send a message by appending it to the end of the log and a consumer can receive message by reading the log sequentially. In order to scale to higher throughput that a single disk can offer, the log can be partitioned. Different partitions can be hosted on different machines. A topic can then be defined as a group of partitions that carry messages of the same type. Apache Kafka, Amazon Kinsesis Streams and Twitter's DistributedLog are log-based message brokers. Google Pub/Sub is architecturally similar but exposes a JMS-style API rather than log abstraction. Even though these message brokers write all messages to disk, they are able to achieve throughput of millions of messages per second by partitioning across multiple machines. Log-based approach trivially supports fan-out messaging. Change Data Capture - the process of observing all data changes written to a database and extracting them in a form in which they can be replicated to other systems. You can capture the changes in a database and continually apply the same changes to search index. Event Sourcing - involves storing all changes to the application state as log of change events. Events are designed to reflect things that happened at the application level, rather than low-level state changes. Powerful technique for data modeling: from an application point of view it is more meaningful to record the user's actions as immutable events, rather than recording the effect of those actions on a mutable database: "student cancelled their course enrolment" vs " one entry was deleted from the enrolments table". Event Store is a specialised database to support applications using event sourcing. Applications that use event sourcing typically have some mechanism for storing snapshots of the current state that is derived from the log of events, so they don't need to repeatedly reprocess the full log. CQRS - Command Query Responsibility Segregation - separating the form in which data is written from the form it is read, by allowing several read views. Streams can be used to produce other, derived streams. Stream processing has long been used for monitoring purposes: fraud detection, trading system examining price changes, machines monitoring, monitoring in military. Complex Event Processing (CEP) - an approach developed in the 1990s for analysing event streams, especially geared toward the kind of application that requires searching for certain event patterns. CEP allows you to specify rules to search for certain patterns of events in a stream. CEP systems use a high-level declarative query language like SQL or GUI. Stream processing is used also for analytics on streams, boundary between CEP and stream analytics is blurry. Frameworks: Apache Storm, Spark Streaming, Flink, Concord, Samza, Kafka Streams, Google Cloud Dataflow, Azure Stream Analytics. Types of time windows: - tumbling windows - has a fixed length, and every event belongs to exactly one window. Fo example 1-minute tumbling window, events with timestamp between 10:03:00 and 10:03:59 are grouped into one window. - hopping window - has a fixed length, but allows windows to overlap in order to provide some smoothing. - sliding window - contains all the events that occur within some interval of each other. For example. a 5-minute sliding window would cover events at 10":03:39 and 10:08:12 because they are less than 5 minutes apart. - session window - has no fixed duration, instead it is defined by grouping together all events for the same user that occur closely together in time, and the window ends when the user has been inactive for some time. Types of stream joins: - stream-stream join (window join) - you need to choose a suitable window for the join (seconds, days weeks between events), also be careful about ordering of received events. - stream-table join (stream enrichment) - to perform this join, the stream process needs to look at one activity event at a time, look up something in the database (local or remote) - table-table join (materialised view maintenance) - twitter example: when user wants to see their feed, it is too expensive to load all profiles' most recent tweets, instead we want a timeline cache, so reading is a simple lookup. To implement cache maintenance (append to cache new tweets, remove deleted, ...) you need streams of events for tweets. If events on different streams happen around a similar time, in which order they are processed? If the ordering of events across streams is undetermined, the join becomes nondeterministic, which means you cannot rerun the same job on the same input and get the same result. In data warehouses, this issue is known as a slowly changing dimension (SCD). It is often addressed by using a unique identifier for a particular version of the joined record. Batch processing frameworks can tolerate faults fairly easily. In stream processing, fault tolerance is less straightforward to handle. Possible approaches: - microbatching and checkpointing - break the stream into small blocks, and treat each block like a miniature batch process (used in Spark Streaming, batch approx. 1 second long). Apache Flink periodically generate rolling checkpoints of state and write them to durable storage. - atomic commit revisited - in order to give the appearance of exactly-once processing in the presence of faults, we need to ensure that all outputs and side effects of processing take effect if and only if the processing is successful. Exactly-once message processing in the context of distributed transactions and two-phase commit. - idempotence - our goal is to discard the partial output of any failed tasks so that they can be safely retried without taking effect twice. Distributed transactions are the one way of achieving this, but another way is to rely on idempotence. An idempotent operation is one that you can perform multiple times, and it has the same effect as if you performed it only once (e.g. setting key in a key-value store, incrementing counter value is not idempotent). Even if an operation is not naturally idempotent, it can often be made idempotent with a bit of extra metadata. ## Chapter 12: The Future of Data Systems The lambda architecture - incoming data should be recorded by appending immutable events to an always-growing dataset, similarly to event sourcing. From these events, read-optimised views are derived. The lambda architecture proposes running two different systems in parallel. In the lambda approach, the stream processor consumes the events and quickly produces an approximate update to the view, the batch processor later consumes the same set of events and produces a corrected version of derived view. Federated databases - unifying reads - it is possible to provide a unified query interface to a wide variety of underlying storage engines and processing methods - an approach known as a federated database or polystore. Unbundled databases - unifying writes - making it easier to reliably plug together storage systems is like unbundling a database's index-maintenance features in a way that can synchronise writes across disparate technologies. Hardware is not quite the perfect abstraction that it may seem. Random bit-flips are very rare on modern hardware but can happen. Even software lik MySQL or PostgreSQL can have bugs. Large scale storage systems like HDFS or Amazon S3 do not fully trust disks: they run background processes that continually read back files, compare them to other replicas and move files from one disk to another, in order to mitigate the risk of silent corruption. ACID databases has led us toward developing applications on the basis of blindly trusting technology. Since the technology we trusted worked well enough most time, auditing mechanisms were deemed worth the investment. Having continuous end-to-end integrity checks gives you increased confidence about the correctness of your systems, which in turn allows you to move faster (like automated testing software). It is not sufficient for software engineers to focus exclusively on the technology and ignore its ethical consequences. Users are humans and human dignity is paramount. Algorithmic prison - systematically being excluded from jobs, air travel, insurance coverage, property rentals, financial services, ... because algorithm said NO. In countries that respect human rights, the criminal system presumes innocence until proven guilty, on the other hand automated systems can systematically exclude a person from participating in society without any proof of guilt and with little chance of appeal. Decisions made by an algorithm are not necessarily any better or worse than those made by a human. Every person is likely to have biases. In many countries, anti-discrimination laws prohibit treating people differently depending on protected traits (ethnicity, age, gender, sexuality, disability, beliefs). Automated decision-making opens the question of responsibility and accountability. Who is responsible if self-driving car causes an accident? Besides, the problems of predictive analysis, there are ethical problems with data collection itself. Though experiment, whenever you see "data" (e.g. data driven company), replace it with the word surveillance (e.g. surveillance driven company). Even the most totalitarian and oppressive regimes could only dream of putting a microphone in every room and forcing every person to constantly carry a device capable of tracking their location and movements. Declining to use a service due to its tracking of users is only an option for the small number of people who are privileged enough to have the time and knowledge to understand its privacy policy and who are can afford to potentially miss out on social participation opportunities. When collecting data, we need to consider not just today's political environments, but all possible future governments. ================================================ FILE: books/docker-deep-dive.md ================================================ [go back](https://github.com/pkardas/learning) # Docker Deep Dive Book by Nigel Poulton ================================================ FILE: books/elixir.md ================================================ [go back](https://github.com/pkardas/learning) # Elixir in Action Book by Saša Jurić ================================================ FILE: books/fundamentals-of-architecture.md ================================================ [go back](https://github.com/pkardas/learning) # Fundamentals of Software Architecture: An Engineering Approach. Book by Mark Richards and Neal Ford - [Preface: Invalidating Axioms](#preface-invalidating-axioms) - [Chapter 1: Introduction](#chapter-1-introduction) - [Chapter 2: Architectural thinking](#chapter-2-architectural-thinking) - [Chapter 3: Modularity](#chapter-3-modularity) - [Chapter 4: Architecture Characteristics Defined](#chapter-4-architecture-characteristics-defined) - [Chapter 5: Identifying Architectural Characteristics](#chapter-5-identifying-architectural-characteristics) - [Chapter 6: Measuring and Governing Architecture Characteristics](#chapter-6-measuring-and-governing-architecture-characteristics) - [Chapter 7: Scope of Architecture Characteristics](#chapter-7-scope-of-architecture-characteristics) - [Chapter 8: Component-Based Thinking](#chapter-8-component-based-thinking) - [Chapter 9: Foundations](#chapter-9-foundations) - [Chapter 10: Layered Architecture Style](#chapter-10-layered-architecture-style) - [Chapter 11: Pipeline Architecture Style](#chapter-11-pipeline-architecture-style) - [Chapter 12: Microkernel Architecture Style](#chapter-12-microkernel-architecture-style) - [Chapter 13: Service-Based Architecture Style](#chapter-13-service-based-architecture-style) - [Chapter 14: Event-Driven Architecture Style](#chapter-14-event-driven-architecture-style) - [Chapter 15: Space-Driven Architecture Style](#chapter-15-space-driven-architecture-style) - [Chapter 16: Orchestration-Driven Service-Oriented Architecture](#chapter-16-orchestration-driven-service-oriented-architecture) - [Chapter 17: Microservices Architecture](#chapter-17-microservices-architecture) - [Chapter 18: Choosing the Appropriate Architecture Style](#chapter-18-choosing-the-appropriate-architecture-style) - [Chapter 19: Architecture Decisions](#chapter-19-architecture-decisions) - [Chapter 20: Analyzing Architecture Risk](#chapter-20-analyzing-architecture-risk) - [Chapter 21: Diagramming and Presenting Architecture](#chapter-21-diagramming-and-presenting-architecture) - [Chapter 22: Making Teams Effective](#chapter-22-making-teams-effective) - [Chapter 23: Negotiation and Leadership Skills](#chapter-23-negotiation-and-leadership-skills) - [Chapter 24: Developing a Career Path](#chapter-24-developing-a-career-path) - [Self-Assessment Questions](#self-assessment-questions) ## Preface: Invalidating Axioms > Axiom - A statement or proposition which is regarded as being established, accepted, or self-evidently true. Software architects (like mathematicians) also build theories atop axioms (but the software world is _softer_ than mathematics). Architects have an important responsibility to question assumptions and axioms left over from previous eras. Each new era requires new practices, tools, measurements, patterns, and a host of other changes. ## Chapter 1: Introduction The industry does not have a good definition of software architecture. > Architecture is about the important stuff... whatever that is ~ Ralph Johnson The responsibilities of a software architect encompass technical abilities, soft skills, operational awareness, and a host of others. When studying architecture - keep in mind that everything can be understood in context - why certain decisions were made, was based on the realities of the environment (for example building microservice architecture in 2002 would be inconceivably expensive). Knowledge of the architecture structure, architecture characteristics, architecture decisions, and design principles is needed to fully understand the architecture of the system. - structure/style: microservices, layered, microkernel, ... - characteristics: availability, reliability, scalability, fault tolerance, security, ... - decisions: what is and what is not allowed, rules for a system how it should be constructed - design principles: guidelines for constructing systems -- leverage async messaging between services to increase performance Expectations of an architect: - make architecture decisions - instead of making technical decisions (use React.js), instruct development teams (use a reactive-based framework) - continually analyze the architecture - validate decisions made years ago in order to prevent structural decay - keep current with the latest trends - the decisions an architect makes tend to be long-lasting and difficult to change. understanding and following key trends helps the architect prepare for the future - ensure compliance with decisions - continually verify that development teams are following the architecture decisions and design principles defined - diverse exposure and experience - an architect should be at least familiar with a variety of technologies, effective architect should be aggressive in seeking out opportunities to gain experience in multiple languages, platforms and technologies - have business domain knowledge - without business knowledge, an architect cannot communicate with stakeholders and business users and will quickly lose credibility - possess interpersonal skills - interpersonal skills, including teamwork, facilitation, and leadership - engineers love to solve technical problems, however G. Weinberg said: "no matter what they tell you, it is always a people problem" - many architects are excellent technologists, but are ineffective architects because of poor communication skills - understand and navigate politics - have negotiation skills, almost every decision an architect makes will be challenged > All architectures become iterative because of _unknown unknowns_. Agile just recognizes this and does it sooner. Iterative process fits the nature of software architecture. Trying to build a modern system such as microservices using Waterfall will find a great deal of friction. Nothing remain static. What we need is _evolutionary architecture_ - mutate the solution, evolve new solutions iteratively. Adopting Agile engineering practices (continuous integration, automated machine provisioning, ...) makes building resilient architectures easier. Agile methodologies support changes better than planning-heavy processes because of tight feedback loop. Laws of Software Architecture: - Everything in software architecture is a trade-off - If an architect thinks they have discovered something that isn't a trade-off, more likely they just haven't identified the trade-off yet - Why is more important than how ## Chapter 2: Architectural thinking 4 main aspects of thinking like an architect: 1. understanding the difference between architecture and design - architecture vs design - architecture: defining architecture characteristics, selecting architecture patterns, creating components - design: class diagrams, user interface, code testing and development - architects and development teams have to form strong bidirectional relationship, be on the same virtual team - where does architecture end and design begin? nowhere - architecture and design must be synchronized by tight collaboration 2. wide breadth of technical knowledge - developer - significant amount of technical depth - specialised in languages, frameworks and tools - architect - significant amount of technical breadth - broad understanding of technology and how to use it to solve particular problems 3. understanding, analyzing, and reconciling trade-offs between various solutions and technologies - thinking like an architect is all about seeing trade-offs in every solution - the ultimate answer for architectural questions: _it depends on ..._ (budget, business env, company culture, ...) - look at the benefits of a given solution, but also analyze the negatives - analyze trade-offs and the ask, what is more important, this decision always depend on the environment 4. understanding the importance of business drivers - business drivers are required for the success of the system - understanding the domain knowledge and ability to translate those requirements into architecture characteristics _Frozen Caveman Anti-Pattern_: describes an architect who always reverts to their pet irrational concern for every architecture. This anti-pattern manifests in architects who have been burned in the past by a poor decision/unexpected occurrence, making them particularly cautious in the future. How an architect can remain hands-on coding skills? - do frequent proof-of-concepts - whenever possible, write best production-quality code (even when doing POCs) -- POC code often remains in the repository and becomes the reference or guiding example - tackle technical debt stories or architecture stories, freeing the development team up to work on the critical function user stories - work on bug fixes - create simple command-line tools and analyzers to help the development team with their day-to-day tasks - do code reviews frequently ## Chapter 3: Modularity Modularity is an organizing principle. If an architect designs a system without paying attention to how the pieces wire together, they end up creating a system that presents myriad difficulties. Developers typically use modules as a way to group related code together. For discussions about architecture, we use modularity as a general term to denote a related grouping of code: classes, functions, or any other grouping. _Cohesion_ - refers to what extent the parts of a module should be contained within the same module. It is a measure of how related the parts are to one another. _Abstractness_ is the ratio of abstract artifacts to concrete artifacts. It represents a measure of abstractness versus implementation. A code base with no abstractions vs a code base with too many abstractions. ## Chapter 4: Architecture Characteristics Defined Architects may collaborate on defining the domain or business requirements, but one key responsibility entails defining, discovering, and analyzing all the things the software must do that isn't directly related to the domain functionality -- architectural characteristics. Operational Architecture Characteristics: - Availability - how long the system will need to be available - Continuity - disaster recovery capability - Performance - stress testing, peak analysis - Recoverability - how quickly is the system required to be on-line again? - Reliability - if it fails, will it cost the company large sums of money? - Robustness - ability to handle error and boundary conditions while running - Scalability - ability for the system to perform and operate as the number of users/requests increases Structural Architecture Characteristics - Configurability - ability for the end users to easily change aspects of the software's configuration - Extensibility - how important it is to plug new pieces of functionality in - Installability - ease of system installation on all necessary platforms - Localization - support for the multiple languages, currencies, measures - Maintainability - how easy it is to apply changes and enhance the system? - Portability - does the system need to run on more than one platform? - Supportability - what level of technical support is needed by the application? - Upgradeability - ability to quickly upgrade from a previous version Cross-cutting Architecture Characteristics - Accessibility - access to all users, including those with disabilities - Archivability - will the data need to be deleted/archived? - Authentication - security requirements to ensure users are who they say they are - Authorization - security requirements to ensure users can access only certain functions within application - Legal - what legislative constraints is the system operation in? - Privacy - ability to hide transactions from internal company employees - Security - does the data need to be encrypted in the database, what type of authentication is needed...? - Supportability - what level of technical support is needed by the application? - Usability - level of training required for users to achieve their goals with the app Any list of architecture characteristics will be an incomplete list. Any software may invent important architectural characteristics based on unique factors. Many of the terms are imprecise and ambiguous. No complete list of standards exists. Applications can support only a few of the architecture characteristics we have listed. Firstly, each of the supported characteristics requires design effort. Secondly, each architecture characteristic often has an impact on others. Architects rarely encounter the situation where they are able to design a system and maximize every single architecture characteristics. > Never shoot for the best architecture, but rather _the least worst_ architecture. Too many architecture characteristics lead to generic solutions that are trying to solve every business problem, and those architectures rarely work because the design becomes unwieldy. Architecture design should be as iterative as possible. ## Chapter 5: Identifying Architectural Characteristics Identifying the correct architectural characteristics for a given problem requires an architect to not only understand the domain problem, but also collaborate with the problem domain stakeholders to determine what is truly important from a domain perspective. _Extracting architecture characteristics from domain concerns_: translate domain concerns to identify the right architectural characteristics. Do not design a generic architecture, focus on short list of characteristics. Too many characteristics leads to greater and greater complexity. Keep design simple. Instead of prioritizing characteristics, have the domain stakeholders select the top 3 most important characteristics from the final list. Translation of domain concerns to architecture characteristics: - Mergers and acquisition -> Interoperability, scalability, adaptability, extensibility - Time to market -> Agility, testability, deployability - User satisfaction -> Performance, availability, fault tolerance, testability, deployability, agility, security - Competitive advantage -> Agility, testability, deployability, scalability, availability, fault tolerance - Time and budget -> Simplicity, feasibility _Extracting architecture characteristics from requirements_: some characteristics come from explicit statements in requirements. Architecture Katas - in order te become a great architect you need a practice. The Kata exercise provides architects with a problem stated in domain terms (description, users, requirements) and additional context. Small teams work 45 minutes on a design, then show results to the other groups, who vote on who came up with the best architecture. Team members ideally get feedback from the experienced architect abut missed trade-offs and alternative designs. Explicit characteristics - appear in a requirements' specification, e.g. support for particular number of users. Implicit characteristics - characteristics aren't specified in requirements documents, yet they make up an important aspect of the design, e.g. availability - making sure users can access the website, security - no one wants to create insecure software, ... Architects must remember: there is no best design in architecture, only a least worst collection of trade-offs. ## Chapter 6: Measuring and Governing Architecture Characteristics - They aren't physics - many characteristics have vague meanings, the industry has wildly differing perspectives - Wildly varying definitions - different people may disagree on the definition, without agreeing on a common definition, a proper conversation is difficult - Too composite - many characteristics compromise may others at a smaller scale Operational measures: obvious direct measurements, like performance -- measure response time. High-level teams don't just establish hard performance numbers, they base their definitions on statistical analysis. Structural measures: addressing critical aspects of code structure, like cyclomatic complexity - the measurement for code complexity, computed by applying graph theory to code. > Overly complex code represents a code smell. It hurts almost every of the desirable characteristics of code bases > (modularity, testability, deployability, ...). Yet if teams don't keep an eye on gradually growing complexity, > that complexity will dominate the code base. Process measures: some characteristics intersect with software development processes. For example, agility can relate to the software development process, ease of deployment and testability requires some emphasis on good modularity and isolation at the architecture level. Governing architecture characteristics - for example, ensuring software quality within an organization falls under the heading of architectural governance, because it falls within the scope of architecture, and negligence can lead to disastrous quality problems. _Architecture fitness function_ - **any mechanism** that provides an objective integrity assessment of some architecture characteristic or combination of architecture characteristics. Many tools may be used to implement fitness functions: metrics, monitors, unit tests, chaos engineering, ... Rather than a heavyweight governance mechanism, fittness functions provide a mechanism for architects to express important architectural principles and automatically verify them. Developer know that they shouldn't release insecure code, but that priority competes with dozens or hundreds of other priorities for busy developers. Tools like the Security Monkey, and fitness functions generally, allow architects to codify important governance checks into the substrate of the architecture. ## Chapter 7: Scope of Architecture Characteristics When evaluating many operational architecture characteristics, an architect must consider dependent components outside the code base that will impact those characteristics. _Connascence_ - Two components are connascent is a change in one would require the other to be modified in order to maintain the overall correctness of the system. If two services in a microservices architecture share the same class definition of some class, they are statically connascent. Dynamic connascence: synchronous - caller needs to wait for the response from the callee, asynchronous calls allow fire-and-forget semantics in event-driven architecture. Component level coupling isn't the only thing that binds software together. Many business concepts semantically bind parts of the system together, creating functional cohesion. _Architecture quantum_ - an independently deployable artifact with high functional cohesion and synchronous connascence. - independently deployable - all necessary components to function independently from other parts of the architecture ( e.g. a database - the system will not function without it) - high functional cohesion - how well the contained code is unified in purpose, meaning - an architecture quantum needs to do something purposeful - synchronous connascence - synchronous call within an application context of between distributed services that form this architecture quantum. ## Chapter 8: Component-Based Thinking Architects typically think in terms of components, the physical manifestation of a module. Typically, the architect defines, refines, manages, and governs components within an architecture. Architecture Partitioning - several styles exist, with different sets of trade-offs (layered architecture, modular monolith). > Convay's Law: Organizations which design systems ... are constrained to produce designs which are copies of > the communication structures of these organizations. This law suggests that when a group of people deigns some technical artifact, the communication structures between the people end up replicated in the design. Technical partitioning - organizing components by technical capabilities (presentation, business rules, persistence). Domain partitioning - modeling by identifying domains/workflows independent and decouples from another. Microservices are based on this philosophy. Developer should never take components designed by architects as the last words. All software design benefits from iteration. Initial design should be viewed as a first draft. Component identification flow: - identify initial components - assign requirements to components - analyze roles and responsibilities - analyze architecture characteristics - restructure components Finding proper granularity for components is one of most difficult tasks. Too fine-grained design leads to too much communication between components, too coarse-grained encourage high internal coupling. Discovering components: - entity trap - anti-pattern when an architect incorrectly identifies the database relationships, this anti-pattern indicates lack of thought about the actual workflows of the application. - actor-actions approach - a popular way to map requirements to components, identify actors who perform activities with the application and the actions those actors may perform. - event storming - the architect assumes the project will use messages and/or events to communicate between components, the team tries to determine which events occur in the system based on requirements and identified roles, and build components around those event and message handlers. - workflow approach - identifies the key roles, the kinds of workflows, and builds components around the identified activities Monolithic vs Distributed Architecture: - monolithic: a single deployable unit, all functionality of the system that runs in the process, typically connected to a single database - distributed: multiple services running in their onw ecosystem, communicating via network, each service can may have its own release cadence and engineering practices ## Chapter 9: Foundations Architecture styles (a.k.a. architecture patterns) - describe a named relationship of components covering a variety of architecture characteristics. Style name, similar to design patterns, creates a single name that acts as shorthand between experienced architects. Big Ball of Mud - the absence of any discernible architecture structure. The lack of structure makes change increasingly difficult. Problematic testing, deployment, scalability, performance, ... Mess because of lack of governance around code quality and structure. Client/Server - separation of responsibilities - backend-frontend/two-tier/client-server. Architecture styles can be classified into 2 main types: - monolithic - single deployment of unit code - layered, pipeline, microkernel - distributed - multiple deployment units connected through network - service-based, event-driven, space-based, service-oriented, microservices - much more powerful in terms of performance, scalability, and availability, but there are trade-offs _The Fallacies of Distributed Computing:_ 1. The Network is Reliable - fact: networks still remain generally unreliable, this is why things like timeouts and circuit breakers exist between services. The more a system relies on the network, the potentially less reliable it becomes. 2. Latency is Zero - local call is measured in nanoseconds/microseconds, the same call made through a remote access protocol is measured in milliseconds. Do you know what the average round-trip latency is for a RESTful call in your prod env? 3. Bandwidth is Infinite - communication between remote services significantly utilizes bandwidth causing networks to slow down. Imagine 2000 req/s, 500 kb each = 1 Gb! Ensuring that the minimal amount of data is passed between services in a distributed architecture is the best way to address this fallacy. 4. The Network is Secure - the surface area for threats and attacks increases by magnitudes when moving from a monolithic to a distributed architecture, despite measures like VPNs, trusted networks and firewalls. 5. The Topology Never Changes - network topology (routers, hubs, switches, firewalls, networks, appliances) CAN change, architects must be in constant communication with operations and network administrators to know what is changing and when so they can make adjustments. 6. There is Only One Administrator - this fallacy points to the complexity of distributed architecture and the amount of coordination that must happen to get everything working correctly. Monoliths do not require this level of communication and collaboration due to the single deployment unit characteristics. 7. Transport Cost is Zero - transport cost does not refer to latency, but rather to actual cost in terms of money associated with making a simple RESTful call. Distributed architectures cost significantly more than monolithic architectures, primarily due to increased needs for additional hardware, servers, gateways, firewalls, subnets, proxies, ... 8. The Network is Homogenous - notwork is not made up by one network hardware vendor, not all of this heterogeneous hardware vendors play well together. Other distributed considerations: - distributed logging - debugging in a distributed architecture is very difficult and time-consuming, logging consolidation tools may help. - distributed transactions - in a monolith it is super easy to perform `commit`/`rollback`, it is much more difficult todo the same in a distributed system. Distributed systems rely on eventual consistency - this is one of the trade-offs. Transactional SAGAs are one way to manage distributed transactions. - contract maintenance and versioning - a contract is behaviour and data that is agreed upon by both the client and service, maintenance is hard due to decoupled services owned by different teams and departments. ## Chapter 10: Layered Architecture Style The Layered Architecture (n-tiered) - standard for most applications, because of simplicity, familiarity, and low cost. The style also falls into several architectural anti-patterns (architecture by implication, accidental architecture). Most layered architectures consist of 4 standard layers: presentation, business, persistence, and database. The layered architecture is a technically partitioned architecture (as opposed to domain-partitioned architecture). Groups of components, rather than being grouped by domain, are grouped by their technical role in the architecture. As a result, any particular business domain is spread throughout all of the layers of the architecture. A domain-driven design does not work well with the layered architecture style. Each layer can be either closed or open. - closed - a request moves top-down from layer to layer, the request cannot skip any layers - open - the request can bypass layers (fast-lane reader pattern) The layers of isolation - changes made in one layer of the architecture generally don't impact/affect components in other layers. Each layer is independent of the other layers, thereby having little or no knowledge of the inner workings of other layers in the architecture. Violation of this concept produces very tightly coupled application with layer interdependencies between components This type of architecture becomes very brittle, difficult and expensive to change. This architecture makes for a good starting point for most applications whe it is not known yet exactly which architecture will ultimately be used. Be sure to keep reuse at minimum and keep object hierarchies. A good level of modularity will help facilitate the move to another architecture style later on. Watch out for the architecture sinkhole anti-pattern - this anti-pattern occurs when requests move from one layer to another as simple pass-through processing with no business logic performed within each layer. For example, the presentation layer responds to a simple request from the user to retrieve basic costumer data. ## Chapter 11: Pipeline Architecture Style Pipeline (a.k.a. pipes, filters) architecture: _Filter -(Pipe)-> Filter -(Pipe)-> Filter -(Pipe)-> Filter_ - pipes - for the communication channel between filters, each pipe is usually unidirectional and point-to-point. - filters - self-contained, independent from other filters, stateless, should perform one task only. 4 types of filters exist within this architecture style - producer - the starting point of a proces, sometimes called the source - transformer - accepts input, optionally performs a transformation on data, then forwards it to the outbound pipe, also known as "map" - tester - accepts inout, tests criteria, then optionally produces output, also known as "reduce" - consumer - the termination point for the pipeline flow, persist or display the final result ETL tools leverage the pipeline architecture for the flow and modification of data from one database to another. ## Chapter 12: Microkernel Architecture Style The microkernel architecture style (a.k.a plug-in) - a relatively simple monolithic architecture consisting of two components: a core system and plug-in components. Core system - the minimal functionality required to run the system. Depending on the size and complexity, the core system can be implemented as a layered architecture or modular monolith. Plug-in components - standalone, independent components that contain specialized processing, additional features, and custom code meant to enhance or extend the core system. Additionally, they can be used to isolate highly volatile code, creating better maintainability and testability within the application. Plug-in components should have no-dependencies between them. Plug-in components do not always have to be point-to-point communication with the core system (REST or messaging can be used instead). Each plug-in can be a standalone service (or even microservice) - this topology is still only a single architecture quantum due to monolithic core system. Plug-in Registry - the core system needs to know about which plug-in modules are available and gow to get them. The registry contains information about each plug-in (name, data, contract, remote access protocol). The registry can be as simple as an internal map structure owned by the core system, or as complex as a registry and discovery tool (like ZooKeeper or Consul). Examples of usages: Eclipse IDE, JIRA, Jenkins, Internet web browsers, ... Problems that require different configurations for each location or client match extremely well with this architecture style. Another example is a product that places a strong emphasis on user customization and feature extensibility. ## Chapter 13: Service-Based Architecture Style A hybrid of the microservices, one of the most pragmatic architecture styles (flexible, simpler and cheaper than microservices/even-driven services). Topology: a distributed macro layered structure consisting of a separately deployed user interface, separately deployed coarse-grained services (domain services) and a monolithic database. Because the services typically share a single monolithic database, the number of services within an application context range between 4 and 12. Base on scalability, fault tolerance, and throughput - multiple instances of a domain service can exist. Multiple instances require some load-balancing. Many variants exist within the service-based architecture: - single monolithic user interface - domain-based user interface - service-based user interface Similarly, you can break apart a single monolithic database, going as far as domain-scoped databases. Service-based architecture uses a centrally shared database. Because of small number of services, database connections are not usually an issue. Database changes, can be an issue. If not done properly, a table schema change can impact every service, making database changes very costly task in terms of effort and coordination. One way to mitigate the impact and risk of database changes is to logically partition the database and manifest the logical partitioning through federated shared libraries. Changes to a table within a particular logical domain, impacts only those services using that shared library. When making changes to shared tables, lock the common entity objects and restrict change access to only the database team. This helps control change and emphasizes the significance of changes to the common tables used by all services. Service based architecture - one of the most pragmatic architecture styles, natural fit when doing DDD, preserves ACID better than any other distributed architecture, good level of architectural modularity. ## Chapter 14: Event-Driven Architecture Style A popular distributed asynchronous architecture style used to produce highly scalable and high-performance apps. It can be used for small applications as well as large, complex ones. Made up of decoupled event processing components that asynchronously receive and process events. It can be used as a standalone style or embedded within other architecture style (e.g. event-driven microservices architecture). 2 primary topologies: - the mediator topology - used when you require control over the workflow of an event process - an event mediator - manages and controls the workflow for initiating events that require the coordination of multiple event processors, usually there are multiple mediators (associated with a particular domain) - if an error occurs (no acknowledgement from on eof event processors), the mediator can take corrective action to fix the problem - the mediator controls the workflow, it can maintain the event state and manage errors - operates on commands (send-email, fulfill-order), rather than on events (email-sent, order-fulfilled) - cons: not as highly decoupled as the broker topology, lower scalability, hard to model complex workflows - the broker topology - used when you require a high level of responsiveness - no central event mediator - message flow is distributed across the event processor components in a chain-like broadcasting fashion - a good practice: for each event processor advertise what it did to the rest of the system, regardless of whether any other event processor cares about what that action was - operates on events (email-sent, order-fulfilled), rather than on commands (send-email, fulfill-order) - cons: challenging error handling - no central monitoring/controlling, not possible to restart a business transaction (because actions are taken asynchronously) ERROR HANDLING: the workflow event pattern - leverages delegation, containment, and repair through the use of a workflow delegate. On error, the event consumer immediately delegates the error to the workflow processor and moves on. The workflow processor tries to figure out what is wrong with the message (rules, machine learning, ...), once the message is repaired it can be sent back to the event processor. In case a very problematic error a human agent can determine what is wrong with the message and then re-submit. Data loss (lost messages) - a primary concern when dealing with asynchronous communication. Typical data-loss scenarios: - the message never makes it to the queue or the broker goes does before the event processor can can retrieve the message - solution: leverage persistent message queues (guaranteed delivery), message persisted in the broker's database ( not only in the memory) - event processor de-queues message and crashes before it can process the message - solution: _client acknowledge mode_ - message is not deleted from the broker immediately, but waits for acknowledgement - event processor is unable to persist the message in the database - solution: leverage ACID transactions Broadcast - the capability to broadcast events without knowledge of who is receiving the message and what they do with it. Broadcasting is perhaps the highest level of decoupling between event processors. In event-driven architecture, synchronous communication is accomplished through **request-reply** messaging. Each event channel within request-reply messaging has 2 queues (request + reply queue). 2 primary techniques for implementing request-reply messaging: 1. [PREFERRED] Correlation ID - a field in the reply message usually set to the request message ID. 2. Temporary queue - dedicated for the specific request, created when the request is made, and deleted when the request ends. Does not require Correlation ID. Large message volumes can significantly slow down the message broker and impact performance and responsiveness. - Request-Based - for well-structured, data-driven requests (e.g. retrieving customer profile data). - Event-Based - for flexible, action-based events that require high level of responsiveness and scale, with complex and dynamic processing. ## Chapter 15: Space-Driven Architecture Style In any high-volume application with a large concurrent load, the database will become a bottleneck, regardless of used caching technologies. The space-based architecture style is specifically designed to address problems involving high scalability, elasticity, and high concurrency issues. _Tuple space_ - the technique of using multiple parallel processors communicating through shared memory. High scalability, elasticity, and performance are achieved by removing the central database and leveraging replicated in-memory data grids. Application data is kept in memory and replicated among all active processing units. Several architecture components that make up a space-based architecture: - processing unit: containing the application code - single or multiple processing units - contains in-memory data grid and replication engine usually implemented using: Hazelcast, Apache Ignite, Oracle Coherence - virtualized middleware: used to manage and coordinate the processing units - handles the infrastructure concerns (data sync, request handling) - made of: - messaging grid - manages input request and session state, determines which active processing components are available to receive the requests and forwards to one of those processing units (usually implemented using HA Proxy and Nginx) - data grid - implemented within the processing units as a replicated cache - processing grid - (optional component) manages orchestrated request processing when there are multiple processing units involved in a single business request - deployment manager - monitors response times and user loads, starts up new processing units when load increases, and shuts down when the load decreases - data pumps: used to synchronously send updated data to the database - is a way of sending data to another processor which then updates data in a database - always asynchronous, provide eventual consistency - when a processing unit receives a request and updates its cache, that processing unit becomes the owner of teh update and is responsible for sending that update through the data pump so that the database can be updated eventually - implemented using messaging; messages usually contain the new data values (diff) - data writers: used to perform the updates from the data pumps - accept messages from a data pump and updates the database with the information contained in the message - data readers: used to read database data and deliver it to processing units upon startup - responsible for reading data from the database and sending it to the processing units via reverse data pump - invoked in one of 3 situations: - a crash of all processing unit instances of the same named cache - a redeployment of all processing units within the same named cache - retrieving archive data not contained in the replicated cache Data collision - occurs when data is updated in one cache instance A, and during replication to another cache instance B, the same data is updated by that cache B.The local update to B will be overridden by the old data from cache A, cache A will be overridden by cache B. Data Collision Rate factors: latency, number of instances, cache size. _Distributed cache_ - better data consistency. _Replicated cache_ - better performance and fault tolerance. Example usages of space-based architecture: well suited for applications that experience high spikes in user or request volume and apps that have throughput excess of 10k concurrent users - online concert ticketing systems, online auction systems. ## Chapter 16: Orchestration-Driven Service-Oriented Architecture This type appeared in the late 1990s when companies were becoming enterprises and architects were forced to reuse as much as possible because of expensive software licenses (no open source alternatives). Reuse - the dominant philosophy in this architecture. - Business Services - sit at the top of this architecture and provide the entry point. No code, just input, output and schema information. - Enterprise Services - fine-grained, shared implementations - atomic behaviors around particular business domain - CreateCustomer, CalculateQuote, ... - collection of reusable assets - unfortunately, the dynamic nature of reality defies these attempts. - Application Services - not all services in the architecture require the same level of granularity, these are one-off, single-implementation services, for example an application a company doesn't want to take the time to make a reusable service. - Infrastructure Services - supply the operational concerns - monitoring, logging, auth. - Orchestration Engine - the heart of this architecture, defines the relationship between the business and enterprise services, how they map together, and where transaction boundaries lie. It also acts as an integration hub, allowing architects to integrate custom code with package and legacy software systems. This architecture in practice was mostly a disaster. When a team builds a system primarily around reuse, they also incur a huge amount of coupling between components. Each change had a potential huge ripple effect. That in turn led to the need for coordinated deployments, holistic testing and other drags on engineering efficiency. This architecture manages to find the disadvantages of both monolithic and distributed architectures! ## Chapter 17: Microservices Architecture There is no secret group of architects who decide what the next big movement will be. Rather, it turns out that many architects end up making common decisions. Microservices differ in this regard - it was popularized by a famous blog entry by Martin Fowled and James Lewis. Microservices Architecture is heavily inspired by the ideas in DDD - bounded context, decidedly inspired microservices. Within a bounded context, the internal parts (code, data schemas) are coupled together to produce work, but they are never coupled to anything outside the bounded context. Each service is expected to include all necessary parts to operate independently. Performance is often the negative side effect of the distributed nature of microservices. Network calls take much longer than method calls. It is advised to avoid transactions across service boundaries, making determining the granularity the key to success in this architecture. It is hard to define the right granularity for services in microservices. If there are too many services, a lot of communication will be required to perform work. The purpose of service boundaries is to capture a domain or workflow. Guidelines to find the appropriate boundaries: - purpose - a domain, one significant behaviour on behalf of the overall application - transactions - often the entities that need to cooperate in a transaction show a good service boundary - choreography - if excellent domain isolation require extensive communication, you may consider merging services back into larger service to avoid communication overhead Microservices Architecture tries to avoid all kinds of coupling - including shared schemas and databases used as integration points. Once a team has built several microservices, they realize that each has common elements that benefit from similarity. The shared sidecar can be either owned by individual teams or a shared infrastructure team. Once teams know that each service includes a common sidecar, they can build a _service mesh_ - allowing unified control across the infrastructure concerns like logging and monitoring. 2 styles of user interfaces: - monolithic user interface - a single UI that calls through the API layer to satisfy user request - micro-frontends - each service emits the UI for that service, which the frontend coordinates with the other emitted UI components Microservices architectures typically utilize _protocol-aware heterogeneous interoperability_: - protocol-aware - each service should know how to call other services - heterogeneous - each service may be written in a different technology stack, heterogeneous means that microservices fully support polyglot environments - interoperability - describes services calling one another, while architects in microservices try to discourage transactional calls, services commonly call other services via the network to collaborate For asynchronous communication, architects often use events and messages (internally utilizing an event-driven architecture). The broker and mediator patterns manifest as choreography and orchestration: - choreography - no central coordinator exists in this architecture - orchestration - coordinating calls across several services Building transactions across service boundaries violates the core decoupling principle of the microservices architecture. DON'T. > Don't do transactions in microservices - fix granularity instead. Exceptions always exist (e.g. 2 different services need vastly different architecture characteristics -> different boundaries), in such situations - patterns exist to handle transaction orchestration (with serious trade-offs). SAGA - the mediator calls each part of the transaction, records success/failure, and coordinates results. In case of an error, the mediator must ensure that no part of the transaction succeeds if one part fails (e.g. send a request to undo - usually very complex). Typically, implemented by having each request in a `pending` state. > A few transactions across services is sometimes necessary; if it is the dominant feature of the architecture, mistakes > were made! Performance is often an issue in microservices - many network calls, which has high performance overhead. Many patterns exist to increase performance (data caching and replication). However, one of the most scalable systems yet have utilized this style to great success, thanks to scalability, elasticity and evolutionary. Additional references on microservices: - Building Microservices - Microservices vs. Service-Oriented Architecture - Microservices AntiPatterns ## Chapter 18: Choosing the Appropriate Architecture Style Choosing an architecture style represents the culmination of analysis and thought about trade-offs for architecture characteristics, domain considerations, strategic goals, and a host of other things. Preferred architecture shift over time, driven by: - observations from the past - rely on experience and observations - changes in the ecosystem - constant change is a reliable feature of the software development - new capabilities - architects must keep an eye open to not only new tools but new paradigms - acceleration - new tools create new engineering practices, which lead to new design and capabilities - domain changes - the business continues to evolve - technology changes - as technology evolves, organizations have to keep up with at least some of these changes - external factors - external factors may force a migration to another option When choosing an architectural style, an architect must take into account all the various factors that contribute to the structure for the domain design. Architects should go into the decision comfortable with the following things: - the domain - good general understanding of the major aspects of the domain - architecture characteristics that impact structure - architect must discover and elucidate the architecture characteristics - data architecture - architects and DBAs must collaborate on database, schema and other DB-related concerns - organizational factors - external factors may influence design - cost, company's plans, ... - knowledge of process, trams, and operational concerns - software development process, interaction with operations and the QA influence a design - domain/architecture isomorphism - some problem domains match the topology of the architecture Several determinations: - monolith vs distributed - where should data live - synchronous or asynchronous communication between services General tip: > Use synchronous by default, asynchronous when necessary ## Chapter 19: Architecture Decisions Making architecture decisions involves gathering enough relevant information, justifying the decision, documenting the decision, and effectively communicating the decision to the right stakeholders. Decision anti-patterns: - covering your assets - occurs when an architect avoids/defers making architecture decisions out of fear of making the wrong choice, 2 ways to overcome: - wait until you have enough information to justify and validate your decision, but waiting too long holds up development teams - continually collaborate with development teams to ensure that the decision can be implemented as expected, quickly respond to change - groundhog day - when people don't know why a decision was made, so it keeps getting discussed over and over, architect failed to provide a justification for the decision (technical and business justifications) - email-driven architecture - where people lose, forget, or don't even know an architecture decision has been made and therefore cannot implement that decision, notify impacted people directly in order to avoid this anti-pattern Architecturally significant decisions are those decisions that affect [OR]: - the structure - impacts the patterns/styles of architecture being used - nonfunctional characteristics - architecture characteristics (performance, scalability, ...) - dependencies - coupling points between components/services within the system - interfaces - how services and components are accessed and orchestrated - construction techniques - platforms, frameworks, tools, processes Architecture Decision Records - ADRs - short text file describing a specific architecture decision. 5 main sections: - title - numbered sequentially and contains short phrase describing the architecture decisions - status - one of: proposed (must be approved-by a higher-level decision maker), accepted (approved & ready for implementation), suspended (decision changed and superseded by another ADR) - context - what situation forces me to make this decision, this section also provides a way to document the architecture (clear & concise) - decision - the architecture decision, along with full justification for the decision, advised to use following voice: we will do, we will use, ... -- this section allows an architect to place more emphasis on _why_ rather than _how_. Understanding why a decision was made is far more important than understanding how something works. - consequences - the overall impact of an architecture decision, this section forces the architect to think about whether those impacts outweigh the benefits of the decision. Another good use is to document the trade-offs' analysis. - [additional] compliance - how the architecture decision will be measured and governed from a compliance perspective - [additional] notes - various metadata -- author, approval date, approved by, superseded date, last modified date, ... Authors' recommendation -- store ADRs in a wiki, rather than on Git. ADRs can be used as an effective means to document a software architecture. ## Chapter 20: Analyzing Architecture Risk Every architecture has risk associated with it -- risk involving availability, scalability, data integrity, ... -- by identifying risks, the architect can address deficiencies and take corrective actions. The Architecture Risk Matrix - 2D array -- the overall impact and the likelihood, each dimension has 3 ratings (low, medium, high). When leveraging the risk matrix to qualify the risk, consider the impact dimension first and the likelihood dimension second. Risk Assessment - a summarized report of the overall risk of an architecture (the risk matrix can be used to build it). Risk Storming - a collaborative exercise used to determine architectural risk within a specific dimension (area of risk) -- unproven technology, performance, scalability, availability, data loss, single points of failure, security. Risk storming is broken down into 3 primary activities: 1. Identification - each participant individually identifies areas of risk within the architecture, should involve analyzing only one particular dimension 2. Consensus - highly collaborative activity with the goal of gaining consensus among all participants 3. Mitigation - involves changes or enhancements to certain areas of the architecture Risk storming can be used for other aspects of software development -- for example story grooming -- story risk, the likelihood that the story will not be completed. ## Chapter 21: Diagramming and Presenting Architecture Effective communication becomes critical to an architect's success. No matter how brilliant an architect's ideas are, if they can't convince managers to fund them and developers to build them. Diagramming and presenting are 2 critical soft skills for architects. Irrational Artifact Attachment - is the proportional relationship between a person's attachment to some artifact and how long it took to produce.If you spend a lot of time on something , you may have an irrational attachment to that artifact (proportional to the time invested). Use Agile approach in order to avoid this anti-pattern - create just-in-time artifacts, use simple tools to create diagrams. Baseline features of a diagram tool: - layers - used to link a group of items together logically to enable hiding/showing individual layers. An architect can build a diagram where they can hide overwhelming details or to incrementally build pictures for presentations - stencils/templates - allow to build up a library of common virtual components (basic shapes with a special meaning, e.g. microservice stencil) - magnets - assistance in drawing lines Diagram Guidelines: - Titles - all elements of the diagram should have title or are well known to the audience - Lines - should be thick enough to be seen, if lines indicate information flow use arrows - solid lines = synchronous communication - dotted lines = asynchronous communication - Shapes - each architect tends to make their own standard set of shapes, hint: use 3D boxes to indicate deployable artifacts and rectangles to indicate containership - Labels - label each item in a diagram, especially if there is a chance of ambiguity for the readers - Color - use colors when it helps to distinguish one artifact from the other - Keys - if shapes are ambiguous, include a key on the diagram clearly indicating what each shape represents Book recommendation: Presentation Patterns When preparing a presentation - use different type of transition when changing a topic, use the same transition within a topic. When presenting, the presenter has 2 presentation channels: verbal and visual. By placing too much text on the slides and then saying the same words, the presenter is overloading one information channel and starving the other. Using animations and transitions in conjunction with incremental builds (reveal information gradually) allows the presenter to make more compelling, entertaining presentations. Info-decks - slide decks that are not meant to be projected but rather summarize information graphically, essentially using a presentation tool as a desktop publishing machine. They contain all the information, are meant to be standalone, no need for presenter. Invisibility - a pattern where the presenter inserts a blank slide within a presentation to refocus attention solely on the speaker (turn of one visual channel). ## Chapter 22: Making Teams Effective A software architect is also responsible for guiding the development team through the implementation of the architecture. Software architect should create and communicate constraints, or the box, in which developers can implement the architecture. Tight boundaries = frustration, loose boundaries = confusion, appropriate boundaries = effective teams. 3 basic types of architect personalities: - a control freak: - controls every detailed aspect of the software development process - too fine-grained and too low-level decisions - may restrict the development team to use specific technology, library, naming convention, class design - steals art of programming away from the developers - an armchair architect: - hasn't coded in a very long time and does not take the implementation details into account - creates loose boundaries, in this scenario, development teams end up taking the role of architect, doing the work an architect is supposed to be doing - in order to avoid such behaviour, an architect should be involved in the technology being used on the project - an effective architect: - produces the appropriate constraints and boundaries, ensures that the team members are working well together and have the right level of guidance on the team - requires working closely and collaborating with the team, and gaining respect of the team as well Elastic Leadership - https://www.elasticleadership.com -- knowing how much control to exert on a given development team, factors to determine how many teams a software architect can manage at once: - team familiarity - the better team members know each other, the less control is needed because team members start to become self-organizing, the newer the team members, the more control needed to help facilitate collaboration among team members and reduce cliques within the team - team size - the larger the team, the more control is needed, the smaller the team, less control is needed - overall experience - teams with more junior developers require more control and mentoring whereas teams with more senior developers require less control - project complexity - highly complex projects require the architect to be more available to the team and to assist with issues that arise, hence more control is needed on the team - project duration - the shorter the duration, the lass control is needed, the longer the project, the more control is needed 3 factors when considering the most effective team size: - process loss - (Brook's law) the more people you add to a project, the more ime the project will take, example: unable to parallelize work, merge conflicts - pluralistic ignorance - when everyone agrees to a norm because they think they are missing something obvious, rather than speaking up, a person chooses to follow the group (similar to "The Emperor's New Clothes" -- the king is naked), an architect should observe body language of all team members and ask each person what they think about the proposed solution - diffusion of responsibility - as team size increases, it has a negative impact on communication An effective architect not only helps guide the development team through the implementation of the architecture, but also ensures that the team is healthy, happy, and working together to achieve a common goal. Checklists work and provide an excellent vehicle for making sure everything is covered and addressed. The key to making teams effective is knowing when to leverage checklists and when not to. Most effective checklists: - code completion checklist - if everything in the checklist is completed, then the developer can claim they are actually done with the code - unit and functional testing checklist - contains some of the more unusual and edge-case tests that software developers tend to forget to test - software release checklist - releasing software is perhaps one of the most error-prone aspects of the software development life cycle, it helps avoid failed builds, deployments, and it significantly reduces the amount of rish associated with releasing software Many items from the checklists can be automated. > Don't worry about stating the obvious in a checklist. It's the obvious stuff that's usually skipped or missed. ## Chapter 23: Negotiation and Leadership Skills Negotiation is one of the most important skills a software architect can have. Effective software architects understand the politics of the organization, have strong negotiation and facilitation skills, and can overcome disagreements when they occur to create solutions that all stakeholders agree on. "We must have zero downtime", "I need these features yesterday", ...: > Leverage the use of grammar and buzzwords to better understand the situation Enter the negotiation wit as many arguments as possible: > Gather as much information as possible _before_ entering into a negotiation Save this negotiation tactic for last: > When all else fails, state things in terms of cost and time Does entire system require 99.999% availability or just some parts?: > Leverage the "divide and conquer" rule to qualify demands or requirements Demonstrate your point with a real-life example: > Always remember that demonstration defeats discussion > Avoid being too argumentative or letting things get too personal in a negotiation -- calm leadership combined with > clear and concise reasoning will always win a negotiation Ivory Tower architecture anti-pattern - Ivory tower architects are ones who simply dictate from on high, telling development teams what to do without regard to their opinion or concerns. This usually leads to a loss of respect for the architect and an eventual breakdown of the team dynamics. > When convincing developers to adopt an architecture decision or to do a specific task, provide a justification rather > than "dictating from on high" By providing a reason why something needs to be done, developers will more likely agree with the request. Most of the time, once a person hears something they disagree with, they stop listening. By stating the reason first, the architect is sure that the justification will be heard. > If a developer disagrees with a decision, have them arrive at the solution on their own Win-win situation: the developer either fail trying and the architect automatically gets buy-in agreement for the architect's decision or the developer finds a better way to address concerns. Accidental complexity - we have made a problem hard, architects sometimes do this to prove their worth when things seem too simple or to guarantee that they are always kept in the loop on discussions and decisions. Introducing accidental complexity into something that is not complex is one of the best ways to become an ineffective leader as an architect. An effective way of avoiding accidental complexity is what we call the 4C's of architecture: - communication - collaboration - clarity - conciseness Be pragmatic, yet visionary. Visionary - Thinking about or planning the future with imagination or wisdom. Pragmatic - Dealing with things sensibly and realistically in a way that is based on practical rather than theoretical considerations. Bad software architects leverage their title to get people to do what they want from them to do. Effective software architects get people to do things by not leveraging their title as architect, but rather by leading through example, not title. Lead by example, not by title. To lead a team and become an effective leader, a software architect should try to become the go-to person on the team - the person developers go to for their questions and problems. Another technique to start gaining respect as a leader and become the go-to person on the team is to host periodic brown-bag lunches to talk about specific technique or technology. Too many meetings? Ask for the meeting agenda ahead of time to help quantify if you are really needed at the meeting or not. Meetings should be either first thing in the morning, right after lunch, or toward the end of the day, but not during the day when most developers experience flow state. > The most important single ingredient is the formula of success is knowing how to get along with people ~ Theodore > Roosevelt ## Chapter 24: Developing a Career Path An architect must continue to learn throughout their career. Technology breadth is more important to architects than depth. The 20-Minute Rule - devote at least 20 minutes a day to your career as an architect by learning something new or diving deeper into a specific topic. Spend min. 20 minutes to Google some unfamiliar buzzwords. Technology Radar: https://www.thoughtworks.com/radar You can create your won personal technology radar. It helps to formalize thinking about technology and balance opposing decision criteria. Architects should choose some technologies and/or skills that are widely in demand and track that demand. But they might also want to try some technology gambits, like open source or mobile development. Architects can utilize social media to enhance their technical breadth. Using media like Twitter professionally, architects should find technologists whose advice they respect. This allows to build a network on new, interesting technologies to assess and keep up with the rapid changes in the technology world. ## Self-Assessment Questions [Chapter 1: Introduction](#chapter-1-introduction) 1. What are the 4 dimensions that define software architecture? Knowledge of the architecture structure, architecture characteristics, architecture decisions, and design principles. 2. What is the difference between an architecture decision and a design principle? Decisions: what is and what is not allowed, rules for a system how it should be constructed. Design principles: guidelines for constructing systems. 3. List the eight core expectations of a software architect. Make architecture decisions. Continually analyze the architecture. Keep current with the latest trends. Ensure compliance with decisions. Diverse exposure and experience. Have business domain knowledge. Posses interpersonal skills. Understand and navigate politics. 4. What is the First Law of Software Architecture. Everything in software architecture is a trade-off. [Chapter 2: Architectural thinking](#chapter-2-architectural-thinking) 1. Describe the traditional approach of architecture versus development and explain why that approach no longer works. In a traditional model the architect is disconnected from the development teams, and as such the architecture rarely provides what it was originally set out to do. Architect defines architecture characteristics, selects architecture patterns and styles, then these artifacts are handed off to the development teams. Boundaries between architects and developers must be broken down. Unlike the old-school waterfall approaches to static and rigid software architecture, the architecture of today's systems changes and evolves every iteration. A tight collaboration is essential for the success. 2. List the three levels of knowledge in the knowledge triangle and provide an example of each. Stuff you know: Python Stuff you know you don't know: Deep Learning Stuff you don't know you don't know: 🤷‍ 3. Why is it more important for an architect to focus on technical breadth rather than technical depth? Architects must make decisions that match capabilities to technical constraints, a broad understanding of a wide variety of solutions is valuable. 4. What are some of the ways of maintaining your technical depth and remaining hands-on as an architect? - do frequent proof-of-concepts - whenever possible, write best production-quality code (even when doing POCs) -- POC code often remains in the repository and becomes the reference or guiding example - tackle technical debt stories or architecture stories, freeing the development team up to work on the critical function user stories - work on bug fixes - create simple command-line tools and analyzers to help the development team with their day-to-day tasks - do code reviews frequently [Chapter 3: Modularity](#chapter-3-modularity) 1. What is meant by the term _connascence_? Two components mare connascent if a change in one would require the other to be modified in order to maintain teh overall correctness of the system. Connascence allows us to go beyond the binary of "coupled" and "not coupled", serving as a tool to measure coupling and describe how bad it is under different levels and kinds. 2. What is the difference between static and dynamic connascence? Static connascence refers to source-code-level coupling - name (multiple entities must agree on the name), type ( multiple entities must agree on the type), meaning (multiple entities must agree on the meaning of particular values), position (multiple entities must agree on the order of the values), algorithm (multiple entities must agree on a particular algorithm). Dynamic connascence analyzes calls at runtime - execution (order of execution), timing (timing of the execution of multiple components), values (several values relate to one another), identity (several values relate to one another and must change together). 3. What does the connascence of type mean? Is it static or dynamic connascence? [STATIC] Multiple components must agree on the type of entity. 4. What is the strongest form of connascence? Identity. Multiple components must reference the same entity. For example when 2 independent components must share and update a common data source. 5. What is the weakest form of connascence? Name. Multiple components must agree on the name. 6. Which is preferred within code base -- static or dynamic connascence? Static. Architects have a harder time determining connascence because we lack tools to analyze runtime calls as effectively as we can analyze the call graph. [Chapter 4: Architecture Characteristics Defined](#chapter-4-architecture-characteristics-defined) 1. What three criteria must an attribute meet to be considered an architecture characteristic? - specifies a non-domain design consideration - influences some structural aspect of the domain - is critical or important to application success 2. What is the difference between an implicit characteristic and an explicit one? Provide an example of each. Implicit - appears in requirements, necessary for project success. Domain knowledge required to uncover such characteristics. Explicit - characteristic listed in the requirements. 3. Provide an example of an operational characteristic. Availability, Continuity, Performance, Reliability, Recoverability, Scalability, ... 4. Provide an example of a structural characteristic. Configurability, Extensibility, Maintainability, ... 5. Provide an example of a cross-cutting characteristic. Accessibility, Authentication, Authorization, Legal, Security, Privacy, ... 6. Which architecture characteristic is more important to strive for -- availability or performance? The ultimate answer for architectural questions: _it depends on ... [Chapter 5: Identifying Architectural Characteristics](#chapter-5-identifying-architectural-characteristics) 1. Give a reason why it is a good practice to limit the number of characteristics an architecture should support. Over-specifying architecture characteristics may kill the project. Example: The Vasa - a Swedish warship, it was supposed to be magnificent, turned out to be too heavy, too complicated. Keep the design simple. 2. True or false: most architecture characteristics come from business requirements and user stories True. 3. If a business stakeholders states that time-to-market is the most important business concern, which architecture characteristic would the architecture need to support? Agility, testability, deployability 4. What is the difference between scalability and elasticity? Scalability - the ability to handle a large number of concurrent users without serious performance degradation. Elasticity - the ability to handle bursts of requests. 5. You find out that your company is about to undergo several major acquisitions to significantly increase its customer base. Which architectural characteristics should you be worried about? Interoperability, scalability, adaptability, extensibility. [Chapter 6: Measuring and Governing Architecture Characteristics](#chapter-6-measuring-and-governing-architecture-characteristics) 1. Why is cyclomatic complexity such an important metric to analyze for architecture? Overly complex code represents a code smell - it harms virtually every one of the desirable characteristics. 2. What is an architecture fitness function? How can they be used to analyze an architecture? Any mechanism that provides an objective integrity assessment of some architecture characteristic or combination of architecture characteristics. Many tools may be used to implement fitness functions: metrics, monitors, unit tests, chaos engineering, ... 3. Provide an example of an architecture fitness function to measure the scalability of an architecture? Code automatic scalability tests and compare results. 4. What is the most important criteria for an architecture characteristic to allow architects and developers to create fitness functions? Architects must ensure that developers understand the purpose of the fitness function before imposing it on them. [Chapter 7: Scope of Architecture Characteristics](#chapter-7-scope-of-architecture-characteristics) 1. What is an architectural quantum, and why is it important to architecture? The architectural quantum is the smallest possible item that needs to be deployed in order to run an application. 3. Assume a system consisting of a single user interface with four independently deployed services, each containing its own separate database. Would this system have a single quantum or four quanta? Why? 4 because each service can be deployed separately. 4. Assume a system with an administration portion managing static reference data (such as the product catalog, and warehouse information) and a customer-facing portion managing the placement of orders. How many quanta should this system be and why? If you envision multiple quanta, could the admin quantum and customer-facing quantum share a database? If so, in which quantum would the database need to reside? 2 quantas - ordering and a warehouse management, separate databases. [Chapter 8: Component-Based Thinking](#chapter-8-component-based-thinking) 1. We define the term component as a building block of an application - something the application does. A component usually consist of a group of classes or source files. How are components typically manifested within an application or service? Components - the physical manifestation of a module. Components offer a language-specific mechanism to group artifacts together, often nesting them to create stratification. Components also appear as subsystems or layers in architecture, as the deployable unit of work for many event processors. 2. What is the difference between technical partitioning and domain partitioning? Provide an example of each. Technical partitioning - organizing architecture based on technical capabilities (presentation, business, service, persistence). Domain partitioning - a modeling technique for decomposing complex systems. In DDD the architect identifies domains independent and decoupled from each other. The microservices architecture is based on this philosophy. 3. What is the advantage of domain partitioning? Better reflects the kinds of changes that most often occur on projects. 4. Under what circumstances would technical partitioning be a better choice over domain partitioning? Separation based on technical partitioning enables developers to find certain categories of code base quickly, as it is organized by capabilities. 5. What is the entity trap? Why is it not a good approach for component identification? Arises when the architect incorrectly identifies the database relationships ads workflows in the application, a correspondence that rarely manifests in the real world. This anti-pattern indicates lack of thought about the actual workflows of the application. Components created with entity-trap tend to be too coarse-grained. [Chapter 9: Foundations](#chapter-9-foundations) 1. List the eight fallacies of distributed computing. Latency is Zero, Bandwidth is Infinite, The Network is Reliable, The Network is Secure, The Topology Never Changes, There is Only One Administrator, Transport Cost is Zero, The Network is Homogenous 2. Name three challenges that distributed architectures have that monolithic architectures don't. Debugging a distributed architecture, Distributed transactions, Contract maintenance and versioning. 3. What is stamp coupling? Requesting/receiving too much data whereas only a small subset of data is needed -- 2000 req x 10kB VS 2000 req x 100kB 4. What are some ways of addressing stamp coupling? - create private RESTful API endpoints - use field selectors in the contract - use GraphQL - use internal messaging endpoints ================================================ FILE: books/go/ch01/Makefile ================================================ # Set default target, when 'make' executed, runs 'build' by default: .DEFAULT_GOAL := build fmt: go fmt ./... # Keep 'make' from getting confused with directories, in this case with directory 'fmt' (if it is ever created): .PHONY: fmt # Before running 'lint', run 'fmt' lint: fmt golint ./... .PHONY: lint vet: fmt go vet ./... .PHONY: vet build: vet go build hello.go .PHONY: build ================================================ FILE: books/go/ch01/hello.go ================================================ package main import "fmt" func main() { fmt.Println("Hello, world!") } ================================================ FILE: books/go/ch02/const.go ================================================ package main import "fmt" const x int64 = 10 const ( idKey = "id" nameKey = "name" ) const z = 20 * 20 func main() { const y = "hello" fmt.Println(x) fmt.Println(y) //x = x + 1 // Error //y = "bye" // Error fmt.Println(x) fmt.Println(y) } ================================================ FILE: books/go/ch02/unicode.go ================================================ package main import "fmt" func main() { ęąćśż := "hello" fmt.Println(ęąćśż) } ================================================ FILE: books/go/ch03/types.go ================================================ package main import "fmt" func main() { var x [3]int fmt.Println(x) var y = [12]int{1, 5: 4} fmt.Println(y) var z = [...]int{12, 20, 30} fmt.Println(z) var p = []int{12, 20, 30} fmt.Println(p) var v []int fmt.Println(v == nil) fmt.Println(len(v)) v = append(v, 10, 20) fmt.Println(v) v = append(v, p...) fmt.Println(v) fmt.Println(cap(v)) r := make([]int, 5) fmt.Println(r) r = make([]int, 0, 20) r = append(r, 10, 20) fmt.Println(r) s := "Hello 😇" fmt.Println(s[6:7]) fmt.Println(s[6:10]) // 4 bytes for emoji teams := map[string][]string{ "Orcas": {"Fred", "Ralph"}, "Lions": {"Sarah", "Peter"}, } fmt.Println(teams) team, ok := teams["Kittens"] fmt.Println(team, ok) set := map[int]bool{} vals := []int{1, 2, 3, 4, 5, 6, 7, 4, 3, 2, 3, 4, 3} for _, v := range vals { set[v] = true } fmt.Println(len(set), len(vals)) if set[1] { fmt.Println("1 is in the set") } type person struct { name string age int pet string } julia := person{ "Julia", 30, "cat", } beth := person{ name: "Beth", } fmt.Println(julia, beth) var bob struct { name string age int pet string } bob.name = "Bob" fmt.Println(bob) } ================================================ FILE: books/go/ch04/case.go ================================================ package main import "fmt" func main() { words := []string{"a", "cow", "smile", "gopher"} for _, word := range words { switch size := len(word); size { case 1, 2, 3, 4: fmt.Println(word, "is a short word!") case 5: wordLen := len(word) fmt.Println(word, "is the exactly the right length:", wordLen) case 6, 7, 8, 9: default: fmt.Println(word, "is a long word!") } } } ================================================ FILE: books/go/ch04/for.go ================================================ package main import ( "fmt" ) func main() { completeFor() conditionOnlyFor() infiniteFor() forRange() labelingStatements() } func completeFor() { for i := 0; i < 10; i++ { fmt.Println(i) } } func conditionOnlyFor() { i := 1 for i < 100 { fmt.Println(i) i = i * 2 } } func infiniteFor() { for { fmt.Println("Hello") break } } func forRange() { evenVals := []int{2, 4, 6, 8, 10, 12} for i, v := range evenVals { fmt.Println(i, v) } for _, v := range evenVals { fmt.Println(v) } for _, v := range evenVals { fmt.Println(v) } uniqueNames := map[string]bool{"Fred": true, "Paul": true, "Wilma": true} for k := range uniqueNames { fmt.Println(k) } } func labelingStatements() { samples := []string{"hello", "apple_π!"} outer: for _, sample := range samples { for i, r := range sample { fmt.Println(i, r, string(r)) if r == 'l' { continue outer } } fmt.Println() } } ================================================ FILE: books/go/ch04/if.go ================================================ package main import ( "fmt" "math/rand" ) func main() { if n := rand.Intn(10); n == 10 { fmt.Println("That's too low") } else if n > 5 { fmt.Println("That's too big:", n) } else { fmt.Println("That's a good number:", n) } } ================================================ FILE: books/go/ch05/anonymous.go ================================================ package main import "fmt" func main() { for i := 0; i < 5; i++ { func(j int) { fmt.Println("printing", j, "from inside of an anonymous function") }(i) } } ================================================ FILE: books/go/ch05/deferExample.go ================================================ package main import ( "io" "log" "os" ) func getFile(name string) (*os.File, func(), error) { f, err := os.Open(name) if err != nil { return nil, nil, err } return f, func() { f.Close() }, nil } func main() { if len(os.Args) < 2 { log.Fatal("no file specified") } f, closer, err := getFile(os.Args[1]) if err != nil { log.Fatal(err) } defer closer() data := make([]byte, 2048) for { count, err := f.Read(data) os.Stdout.Write(data[:count]) if err != nil { if err != io.EOF { log.Fatal(err) } break } } } ================================================ FILE: books/go/ch05/functionAsParam.go ================================================ package main import ( "fmt" "sort" ) type Person struct { FirstName string LastName string Age int } func main() { people := []Person{ {"Pat", "Patterson", 34}, {"Tracy", "Bobbert", 23}, {"Fred", "Fredson", 18}, } sort.Slice(people, func(i int, j int) bool { return people[i].LastName < people[j].LastName }) fmt.Println(people) } ================================================ FILE: books/go/ch05/functions.go ================================================ package main import ( "errors" "fmt" ) func main() { result := Div(5, 2) fmt.Println(result) MyFunc(MyFuncOpts{ LastName: "Smith", Age: 10, }) fmt.Println(addTo(10, 1, 2, 3, 4, 5)) fmt.Println(addTo(10, []int{1, 2, 3, 4, 5}...)) result, remainder, err := divAndRemainder(5, 2) if err != nil { fmt.Println(err) } fmt.Println(result, remainder) } func Div(numerator int, denominator int) int { if denominator == 0 { return 0 } return numerator / denominator } type MyFuncOpts struct { FirstName string LastName string Age int } func MyFunc(opts MyFuncOpts) int { return opts.Age } func addTo(base int, vals ...int) []int { out := make([]int, 0, len(vals)) for _, v := range vals { out = append(out, base+v) } return out } func divAndRemainder(numerator int, denominator int) (result int, remainder int, err error) { if denominator == 0 { err = errors.New("cannot divide by zero") return result, remainder, err } result, remainder, err = numerator/denominator, numerator%denominator, nil return result, remainder, err } ================================================ FILE: books/go/ch05/functionsAreValues.go ================================================ package main import "fmt" func main() { var opMap = map[string]func(int, int) int{ "+": add, "-": sub, "*": mul, "/": div, } fmt.Println(opMap["+"](10, 20)) } func add(i int, j int) int { return i + j } func sub(i int, j int) int { return i - j } func mul(i int, j int) int { return i * j } func div(i int, j int) int { return i / j } ================================================ FILE: books/go/ch05/returnFunction.go ================================================ package main import "fmt" func makeMult(base int) func(int) int { return func(factor int) int { return base * factor } } func main() { twoBase := makeMult(2) threeBase := makeMult(3) for i := 0; i < 3; i++ { fmt.Println(twoBase(i), threeBase(i)) } } ================================================ FILE: books/go/ch06/pointers.go ================================================ package main import "fmt" func failedUpdate(px *int) { x2 := 20 px = &x2 } func update(px *int) { *px = 20 } func main() { y := "hello" fmt.Println(y, &y, *&y) x := 10 failedUpdate(&x) fmt.Println(x) update(&x) fmt.Println(x) } ================================================ FILE: books/go/ch07/counter.go ================================================ package main import ( "fmt" "time" ) type Counter struct { total int lastUpdated time.Time } func (c *Counter) Increment() { c.total++ c.lastUpdated = time.Now() } func (c Counter) String() string { return fmt.Sprintf("total: %d, last updated %v", c.total, c.lastUpdated) } func updateWrong(c Counter) { c.Increment() fmt.Println("in updateWrong:", c.String()) } func updateRight(c *Counter) { c.Increment() fmt.Println("in updateRight:", c.String()) } func main() { var c Counter fmt.Println(c.String()) c.Increment() fmt.Println(c.String()) updateWrong(c) fmt.Println("in main:", c.String()) updateRight(&c) fmt.Println("in main:", c.String()) } ================================================ FILE: books/go/ch07/dependencyInjection.go ================================================ package main import ( "errors" "fmt" "net/http" ) func LogOutput(message string) { fmt.Println(message) } type SimpleDataStore struct { userData map[string]string } func (sds SimpleDataStore) UserNameForId(userID string) (string, bool) { name, ok := sds.userData[userID] return name, ok } func NewSimpleDataStore() SimpleDataStore { return SimpleDataStore{ userData: map[string]string{ "1": "Fred", "2": "Mary", "3": "Pat", }, } } type DataStore interface { UserNameForId(userID string) (string, bool) } type Logger interface { Log(message string) } type LoggerAdapter func(message string) func (lg LoggerAdapter) Log(message string) { lg(message) } type SimpleLogic struct { l Logger ds DataStore } func (sl SimpleLogic) SayHello(userID string) (string, error) { sl.l.Log("in SayHello for " + userID) name, ok := sl.ds.UserNameForId(userID) if !ok { return "", errors.New("unknown user") } return "Hello, " + name, nil } func (sl SimpleLogic) SayGoodbye(userID string) (string, error) { sl.l.Log("in SayGoodbye for " + userID) name, ok := sl.ds.UserNameForId(userID) if !ok { return "", errors.New("unknown user") } return "Goodbye, " + name, nil } func NewSimpleLogic(l Logger, ds DataStore) SimpleLogic { return SimpleLogic{ l: l, ds: ds, } } type MyLogic interface { SayHello(userID string) (string, error) } type Controller struct { l Logger logic MyLogic } func (c Controller) SayHello(w http.ResponseWriter, r *http.Request) { c.l.Log("In SayHello") userID := r.URL.Query().Get("user_id") message, err := c.logic.SayHello(userID) if err != nil { w.WriteHeader(http.StatusBadRequest) w.Write([]byte(err.Error())) return } w.Write([]byte(message)) } func NewController(l Logger, logic MyLogic) Controller { return Controller{ l: l, logic: logic, } } func main() { l := LoggerAdapter(LogOutput) ds := NewSimpleDataStore() logic := NewSimpleLogic(l, ds) c := NewController(l, logic) http.HandleFunc("/hello", c.SayHello) http.ListenAndServe(":8080", nil) } ================================================ FILE: books/go/ch07/embedding.go ================================================ package main import "fmt" type Employee struct { Name string ID string } func (e Employee) Description() string { return fmt.Sprintf("%s (%s)", e.Name, e.ID) } type Manager struct { Employee Reports []Employee } func main() { m := Manager{ Employee: Employee{ Name: "Bob Bobson", ID: "12345", }, Reports: []Employee{}, } fmt.Println(m.ID) fmt.Println(m.Description()) } ================================================ FILE: books/go/ch07/intTree.go ================================================ package main import "log" type IntTree struct { val int left, right *IntTree } func (it *IntTree) Insert(val int) *IntTree { if it == nil { return &IntTree{val: val} } if val < it.val { it.left = it.left.Insert(val) } else if val > it.val { it.right = it.right.Insert(val) } return it } func (it *IntTree) Contains(val int) bool { switch { case it == nil: return false case val < it.val: return it.left.Contains(val) case val > it.val: return it.right.Contains(val) default: return true } } func main() { var it *IntTree it = it.Insert(5) // calling methods on a nil receiver it = it.Insert(3) it = it.Insert(10) it = it.Insert(2) log.Println(it.Contains(2)) log.Println(it.Contains(12)) } ================================================ FILE: books/go/ch07/interfaces.go ================================================ package main import "fmt" type LogicProvider struct{} func (lp LogicProvider) Process(data string) string { return data } type Logic interface { Process(data string) string } type Client struct { L Logic } func (c Client) Program() { data := "whatever" c.L.Process(data) } func main() { c := Client{L: LogicProvider{}} c.Program() var i interface{} i = 1 i = "a" fmt.Println(i) } ================================================ FILE: books/go/ch07/iota.go ================================================ package main type MailCategory int const ( Uncategorized MailCategory = iota Personal Spam Social Ads ) ================================================ FILE: books/go/ch07/types.go ================================================ package main import "fmt" type Person struct { FirstName string LastName string Age int } type King Person // this is not an inheritance func (p Person) String() string { return fmt.Sprintf("%s %s, age %d", p.FirstName, p.LastName, p.Age) } type Score int type Converter func(string) Score type TeamScore map[string]Score func main() { p := Person{ FirstName: "Fred", LastName: "Fredson", Age: 52, } fmt.Println(p.String()) } ================================================ FILE: books/go/ch08/customErrors.go ================================================ package main type Status int const ( InvalidLogin Status = iota + 1 NotFound ) type StatusErr struct { Status Status Message string err error } func (se StatusErr) Error() string { return se.Message } func (se StatusErr) Unwrap() error { return se.err } ================================================ FILE: books/go/ch08/errors.go ================================================ package main import ( "errors" "fmt" "os" ) func calcRemainderAndMod(numerator, denominator int) (int, int, error) { if denominator == 0 { return 0, 0, errors.New("denominator is 0") } return numerator / denominator, numerator % denominator, nil } func main() { numerator := 20 denominator := 3 remainder, mod, err := calcRemainderAndMod(numerator, denominator) if err != nil { fmt.Println(err) os.Exit(1) } fmt.Println(remainder, mod) } ================================================ FILE: books/go/ch08/panic.go ================================================ package main func doPanic(msg string) { panic(msg) } func main() { doPanic("ERR") } ================================================ FILE: books/go/ch08/recover.go ================================================ package main import "fmt" func div60(i int) { defer func() { if v := recover(); v != nil { fmt.Println(v) } }() fmt.Println(60 / i) } func main() { for _, val := range []int{1, 2, 0, 6} { div60(val) } } ================================================ FILE: books/go/ch08/sentinel.go ================================================ package main import ( "archive/zip" "bytes" "fmt" ) type Sentinel string func (s Sentinel) Error() string { return string(s) } const ( ErrFoo = Sentinel("foo err") ErrBar = Sentinel("bar err") ) func main() { data := []byte("This is not a zip file") notZipFile := bytes.NewReader(data) _, err := zip.NewReader(notZipFile, int64(len(data))) if err == zip.ErrFormat { fmt.Println("Told you so") } } ================================================ FILE: books/go/ch08/wrappingErrors.go ================================================ package main import ( "errors" "fmt" "os" ) func fileChecker(name string) error { f, err := os.Open(name) if err != nil { return fmt.Errorf("in fileChecker: %w", err) // %w wraps the error //return fmt.Errorf("in fileChecker: %v", err) // %v does not wrap the error } f.Close() return nil } func main() { err := fileChecker("not_here.txt") if err != nil { fmt.Println(err) if wrappedErr := errors.Unwrap(err); wrappedErr != nil { fmt.Println(wrappedErr) } } } ================================================ FILE: books/go/ch09/formatter/formatter.go ================================================ package print import "fmt" func Format(num int) string { return fmt.Sprintf("The number is %d", num) } ================================================ FILE: books/go/ch09/main.go ================================================ package main import ( "./formatter" "./math" "fmt" ) func main() { num := math.Double(2) output := print.Format(num) fmt.Println(output) } ================================================ FILE: books/go/ch09/math/math.go ================================================ package math func Double(a int) int { return a * 2 } ================================================ FILE: books/go/ch10/deadlock.go ================================================ package main import "fmt" func main() { ch1 := make(chan int) ch2 := make(chan int) go func() { v := 1 ch1 <- v v2 := <-ch2 fmt.Println(v2) }() v := 2 ch2 <- v v2 := <-ch1 fmt.Println(v, v2) } ================================================ FILE: books/go/ch10/deadlockSolution.go ================================================ package main import "fmt" func main() { ch1 := make(chan int) ch2 := make(chan int) go func() { v := 1 ch1 <- v v2 := <-ch2 fmt.Println(v2) }() v := 2 var v2 int select { case ch2 <- v: case v2 = <-ch1: } fmt.Println(v, v2) } ================================================ FILE: books/go/ch10/goroutinesExample.go ================================================ package main func process(val int) int { return val * 2 } func runThingConcurrently(in <-chan int, out chan<- int) { go func() { for val := range in { result := process(val) out <- result } }() } ================================================ FILE: books/go/notes.md ================================================ [go back](https://github.com/pkardas/learning) # Learning Go: An Idiomatic Approach to Real-World Go Programming Book by Jon Bodner Code here: [click](.) - [Chapter 1: Setting Up Your Go Environment](#chapter-1-setting-up-your-go-environment) - [Chapter 2: Primitive Types and Declarations](#chapter-2-primitive-types-and-declarations) - [Chapter 3: Composite Types](#chapter-3-composite-types) - [Chapter 4: Blocks, Shadows, and Control Structures](#chapter-4-blocks-shadows-and-control-structures) - [Chapter 5: Functions](#chapter-5-functions) - [Chapter 6: Pointers](#chapter-6-pointers) - [Chapter 7: Types, Methods, and Interfaces](#chapter-7-types-methods-and-interfaces) - [Chapter 8: Errors](#chapter-8-errors) - [Chapter 9: Modules, Packages, and Imports](#chapter-9-modules-packages-and-imports) - [Chapter 10: Concurrency in Go](#chapter-10-concurrency-in-go) - [Chapter 11: The Standard Library](#chapter-11-the-standard-library) - [Chapter 12: The Context](#chapter-12-the-context) - [Chapter 13: Writing Tests](#chapter-13-writing-tests) - [Chapter 14: Here There Be Dragons: Reflect, Unsafe, and Cgo](#chapter-14-here-there-be-dragons-reflect-unsafe-and-cgo) - [Chapter 15: A Look at the Future: Generics in Go](#chapter-15-a-look-at-the-future-generics-in-go) ## Chapter 1: Setting Up Your Go Environment Go is intended for building programs that last, programs that are modified by dozens of developers over dozens of years. Using Go correctly requires an understanding of how its features are intended to fit together. You can write code that looks like Java or Python, but you are going to be unhappy with the result. > $ brew install go Validate that your env is set up correctly: `go version` There have been several changes in how Go developers organize their code and their dependencies. For modern Go development, the rules is simple: **you are free to organize your projects as you see fit**. However, Go still expects there to be a single workspace (default `$HOME/go`) for third-party Go tools installed via `go install`. You can use this default or set `$GOPATH` env variable. Add following lines to `.zshrc`: ``` export GOPATH=$HOME/go export PATH=$PATH:$GOPATH/bin ``` Use `go run` when you want to treat a Go program like a script and run the source code immediately. `go run` builds the binary in a temporary directory, and the deletes the binary after your program finishes. Useful for testing out small programs or using Go like a scripting language. Use `go build` to create a binary that is distributed for other people to use. Most of the time, this is what you want to do. Use the `-o` flag to give the binary a different name or location. Go programs can be also built from source and installed into your Go work-space via `go install link@version`. Go developers don't rely on a centrally hosted service (Maven, PyPI, NPM, ...). Instead they share projects via their source code repositories. If you already installed a tool and want to update it to a newer version, rerun `go install` with the newer version specified after `@`. Developers have historically wasted extraordinary amounts of time on format wars. Go defines a standard way of formatting code, Go developers avoid arguments over code styling. Go developers expect code to look in a certain way and follow certain rules, and if your code does not, it sticks out. `go fmt` automatically reformat code to match the standard format. Go requires a semicolon at the end of every statement. However, Go developers never put the semicolons by themselves; the Go compiler does it for them. `go vet` detects things like: passing the wrong number of parameters to formatting methods or assigning values to variables that are never used. Make `golint` and `go vet part of your development process to avoid common bugs and non-idiomatic code. An IDE is nice to use, but it is hard to automate. Modern software development relies on repeatable, automatable builds that can be run by anyone, anywhere, at any time. Go developers have adopted `make` as their solution. You can use different Go versions: ``` go get golang.org/dl/go.1.15.6 go.1.15.6 download go.1.15.6 build ``` In order to update Go version globally on your computer use regular `brew` commands. ## Chapter 2: Primitive Types and Declarations When trying to figure out what "best" means, there is one overriding principle: write your programs in a way that makes your intention clear. LITERAL - in Go refers to writing out a number, character, or string. - integer literals - sequences of numbers, normally base 10, but different prefixes are used to indicate other bases (`0b` binary, `0o` octal, `0x` hexadecimal). - put underscores in the middle of your literal, use them to improve readability, e.g. `120_000_000` - floating point literals - they can also have an exponent specified with the letter `e` and a positive or negative number, e.g. `6.03e23` - rune literals - characters surrounded by single quotes, in Go `"` and `'` are _not_ interchangeable. - string literals - two different ways to create: - interpreted string literal (") zero or more rune literals - raw string literal (`) can contain any literal character except a backquote - strings in Go are immutable Literals in Go are untyped - they can interact with any variable that is compatible with the literal. BOOLEAN - `true` or `false`, variable definition defaults to `false`. Go doesn't allow truthiness - e.g. positive integer can not be treated as `true`. INTEGER TYPES - 12 different types, more than other languages. 3 rules to follow: 1. If you are working with a binary format or network protocol that has an integer of a specific size or sign, use corresponding integer type. 2. If you are writing a library function that should work with any integer type, write a pair of functions, one for `int64`, and the other for `uint64`. You can see this pattern in std library (ParseInt/ParseUint, ...) 3. In all other cases, just use `int`. FLOATING POINT - `float64` is the default type, simples option is to use this type. Don't worry about memory usage, unless you have used the profiler to determine it is a significant source of problems. A floating point number cannot represent a decimal value exactly. Do not use them to represent money or any other value that must have an exact decimal representation. Go stores floats using IEEE 754 standard. 64 bits for the sign, 11 bits for the exponent, 52 bits to represent mantissa. Go doesn't allow automatic type promotion, as a language that values clarity of intent and readability. It turns out that the rules to properly convert one type to another can get complicated and produce unexpected results. You must use type conversion. Variable declaration. Go has multiple ways of declaring a variable, because each declaration style communicates something about how the variable is used. - `var x int = 10` - `var x = 10` - `var x int` - will default to 0 - `var z, y int = 10, 20` - `var x, y = 10, "hello"` - `var(...)` - declaration list - `x := 10` The most common declaration style within functions is `:=`. Outside a function, use declaration lists. Sometimes you need to avoid `:=`: 1. When initializing a variable to its zero value, use `var x int`. This makes it clear that the zero is intended. 2. Because `:=` allows assigning to new and existing variables, it is confusing if you use new or existing variable. Declare all new variables with `var`, and then use assignment operator (`=`) to both new and old variables. 3. When you need to convert type during assignment, use `var x byte = 20`, not `x := byte(20)`. Go allows Unicode characters and letters in the variable name. However, don't use this feature. Naming: - use `camelCase`, even for constant vars - use single letters for e.g. loops: `k`, `v` are common names for `key`, `value`; `i` for `integer`, ... - do not put type in the variable name - use short names, they remove repetitive typing and force you to write smaller blocks of code (if you need a complete name to keep track of it, it is likely that your block of code does too much) ## Chapter 3: Composite Types ARRAYS - rarely used in Go. All the elements in the array must be of the type that is specified. ```go var x [3] int var x [3] int{10, 20, 30} var x = [12]int{1, 5: 4} // Sparse array (most elements are set to zero value) var x = [...]int{12, 20, 30} ``` Arrays are rarely used in go because they come with an unusual limitations: - _size_ of the array is part of the _type_, `[3]int` has different type than `[4]int`, you can't use a variable to specify the size of an array - you can't use a type conversion to convert arrays of different sizes to identical types Don't use arrays unless you know the exact length you need ahead of time. Arrays in Go exist to provide backing stores for SLICES. SLICES - slices remove limitations of arrays. We can write a single function that processes slices of any size. We can also grow slices as needed. Slice definition: ```go var x = []int{12, 20, 30} ``` Using `[...]` makes an array. Using `[]` makes a slice. `nil` in Go has no type, can be assigned or compared against values of different types. Built-in functions: - `len` - `len(nil)` - `append` - `x = append(x, 10, 20, 30)`, `x = append(x, y...)` (`...` used to expand the source slice) - `cap` - `cap(v)` - returns the current capacity of a slice - `make` - `x := make([]int, 5)` - it allows us to specify the type, length, and optionally, the capacity - `copy` - `numberOfElementsCopied := copy(destination, source)` - if you need to create a copy that is independent of the original Go is _Call by value_ - every time you pass a parameter to a function, Go makes a copy of the value that is passed in. When a slice grows via `append`, Go increases a slice by more than a one when it runs out of capacity. Doubles the size when the size of the capacity is less than 1024 and then grow by at least 25% afterward. `make` and `append` is a preferred way of declaring slices. Slicing: `[startingOffset: endingOffset]`. In Go when you take a slice from a slice, you are not making a copy of the data, Instead you have 2 variables that are sharing two variables. Avoid modifying slices after they have been sliced or if they were produced by slicing. Use the full slice expression to prevent `append` from sharing capacity between slices (`x[:2:2]`, `x[2:4:4]`). The last position indicates the last position in the parent slice's capacity that is available for the subslice. Subtract the starting offset from this number to get the subslice's capacity. Array can be converted to Slice by using slicing expression. Go allows us to use slicing notation to make substrings. Be very careful when doing so. Strings are immutable, they don't have modification problem BUT a string is composed of _bytes_, a code point in UTF-8 can be anywhere from one to four bytes long. When dealing with languages other than English or with emojis, you run into code points that are multiple bytes long. UTF-8 is very clever, in worst case uses 4 bytes, in best case only one. The only downside is that you cannot randomly access a string encoded with UTF-8. MAPS - dictionary/hash map. Declaration `map[keyType]valueType`. - maps automatically grow as you add key-value pairs - is you know how many key-value pairs you plan to insert into a map, you can use `make` to create a map with specific initial size - passing a _map_ to the _len_ function tells you the number of key-value pairs in a _map_ - the zero value for a map is nil - maps are not comparable Go doesn't allow you to define your own hash algorithm. Comma ok idiom - boolean value, if ok is true - key is present, ok is false - key is not present. - `delete` - `delete(m, key)` (remove key-value pair from the map) Go does not include sets, but you can use a map to simulate some of its features. Set simulation: ```go set := map[int]bool{} ``` If you need sets that provide operations like union, intersection, and subtraction - write one yourself or use 3rd-party library. STRUCT - when you have related data that you want to group together. ``` type person struct { name string age int pet string } julia := person{ "Julia", 30, "cat", } beth := person{ name: "Beth", } ``` Anonymous struct - without giving it a name first: ``` var person struct { name string age int pet string } person.name = "Bob" ``` Whether struct is comparable depends on struct's fields. Structs that are entirely composed of comparable types are comparable, those with slice or map fields are not. Unlike Python, there are no methods that can be overridden to redefine equality. Go allows you to perform a type conversion from one struct to another _if the fields of both structs have the same names, order, and types_. ## Chapter 4: Blocks, Shadows, and Control Structures BLOCKS - Go lets you declare variables in lots of places. You can declare them outside of functions, as the parameters to functions, and as local variables within functions. Each place where a declaration occurs is called a _block_. Variables, constants, types and functions declared outside any functions are placed in the package module. `:=` reuses variables that are declared in the current block. When using `:=` make sure that you don't have any variables from an outer scope on the left-hand side, unless you intend to shadow them. Sometimes avoid using `:=` because it may make it unclear what variables are being used. There is a `shadow` linter - a tool to detect shadowing. The Universe Block - the block that contains all other blocks. Never redefine any of the identifiers in the universe block (`true`, `false`, `string`, `int`, ...). If you accidentally do so, you will get some very strange behavior. IF - Go doesn't require you to put parenthesis around the condition. You can declare variables that are scoped to the condition and to both the `if` and `else` blocks. ```go if n := rand.Intn(10); n == 10 ``` Having this special scope is very handy, it lets you create variables that are available only where they are needed. Once the series of `if/else` statements ends, `n` is undefined. FOR - Go has 4 formats of `for`. - C-style `for` - condition only `for` - infinite `for` - `for-range` When iterating over `map`, some runs may be identical. This is a security feature. In older Go versions, the iteration order was usually the same. People used to write code that the order was fixed, and this would break at weird times. Random read, prevents _Hash DoS_ attack. When iterating over a string with `for-range` loop, it iterates over the runes, not the bytes. Whenever a `for-range` loop encounters a multibyte rune in a string, it converts the UTF-8 representation into a single 32-nit number and assigns it to the value. Every time the `for-loop` iterates over your compound type, it copies the value from the compound type to the value variable. SWITCH - like an `if` statement, you can declare a variable that is scoped to all the branches of the switch statement. If you have a `switch` statement inside a `for loop`, and you want to break out of the `for loop`, put a label on the `for` statement, and then do `break label`. If you don't use a label, Go assumes that you want to break out of the case. You can create a "blank switch" - this allows you to use any boolean comparison for each case. There isn't a lot of difference between a series of `if/else` statements and a blank `switch`. Favor blank `switch` statements over `if/else` chains when you have multiple related cases. Using a `switch` makes the comparisons more visible and reinforces that they are a related set of concerns. GOTO - Traditionally `goto` was dangerous because it could jump to nearly anywhere in a program (jump into/out of a loop, skip variable definitions, or into the middle of a set of statements in `if`). This made it difficult to understand what a goto-using program did. Go has a `goto` statement (most modern languages don't). You should still do what you can to avoid using it. Go forbids jumps that skip over variable declarations and jumps that go into an inner or parallel block. ## Chapter 5: Functions `main` - the starting point for every Go program. Go is a typed language, so you must specify the types of parameters. If a function returns a value, you must supply a return. Go doesn't have named and optional input parameters. If you want to emulate named and optional parameters, define a struct that has fields that match the desired parameters, and pass the struct to your function. Not having named and optional parameters isn't a limitation. A function shouldn't have more than a few parameters, and named and optional parameters are mostly useful when a function has many inputs. If you find yourself in that situation, your function is quite possibly too complicated. Variadic input - `func addTo(base int, vals ... int)` - must be the last parameter in the input parameter list. Go allows for multiple return values - `def divAndRemainder(numerator int, denominator int) (int, int, error)`. You can pre-declare variables that you use within function to hold the return values: `def divAndRemainder(numerator int, denominator int) (result int, remainder int, err error)`. Name that is used for a named returned value is local to the function - it doesn't enforce any name outside the function. If you use named return values, you can use empty/blank/naked return - never use it. This returns the last values assigned to the named return values. It can be really confusing to figure out what value is actually returned. Use `_` whenever you don't need to read a value that is returned by a function. Just like in many other languages, functions in Go are values. Any function that has the exact same number and types of parameters and return values meets the type signature. Anonymous functions - they don't have a name. You don't have to assign them to a variable. You can write them inline and call them immediately. Functions declared inside functions are called _closures_. This is a computer science word that means that functions declared inside of functions are able to access and modify variables declared in the outer function. Not only you can use a closure to pass some function state to another function, you can also return a closure from a function. `defer` - used to release resources. Programs often create temporary resources, like files or network connections, that need to be cleaned up. You can `defer` multiple closures in a Go function. They run last-in-first-out order - the last defer registered runs first. In Go, _defer_ statements delay the execution of the function or method or an anonymous method until the nearby functions returns. In other words, defer function or method call arguments evaluate instantly, but they don't execute until the nearby functions returns. A common pattern in Go is for a function that allocates a resource to also return a closure that cleans up the resource. Empirical Software Engineering: > Of... eleven proposed characteristics, only two markedly influence complexity growth: the nesting depth and the lack > of structure. Go is _Call By Value_ - it means that when you supply a variable for a parameter to a function, Go always makes a copy of the value of the variable. Every type in Go is a value type. It is just that sometimes the value is pointer (map, slice). ## Chapter 6: Pointers A pointer - a variable that holds the location in memory where a value is stored. Every variable is stored in one or more contiguous memory locations - _addresses_. - `&` - the _address_ operator, returns the address of the memory location where the value is stored. - `*` - the _indirection_ operator, returns pointed-to value. Example pointer **type**: `*int` Before de-referencing a pointer, you must make sure that the pointer is non-nil. Your program will panic if you attempt to de-reference a _nil_ pointer. Java, Python, JavaScript, and Ruby are pass-by-value (values passed to functions are copies) - just like Go. Every instance of a class in these languages is implemented as a pointer. When a class instance is passed to a function or method, the value being copied is the pointer to the instance. > Immutable types are safer from bugs, easier to understand, and more ready for change. Mutability makes it harder to > understand what your program is doing, and much harder ro enforce contracts. The lack of immutable declarations in Go might seem problematic, but the ability to choose between value and pointer parameter types addresses the issue. Be careful when using pointers in Go. They make it hard to understand data flow anc can create extra work for the garbage collector. Rather than populating a struct by passing a pointer to it to function, have the function instantiate the struct. The only time you should use pointer params to modify a variable is when the function expects an interface, You see this pattern when working with JSON. The time to pass a pointer into a function ~ 1ns. Passing a value into a function takes longer as the data gets larger, 1ms for ~10MB data. So if data is large enough, there are performance benefits from using a pointer. On the other hand it does not pay off to use a pointer for small data (< 1MB), e.g. 100 byte data: 30ns (pointer) vs 10ns (copy value). Pointers indicate mutability - be careful when using this pattern. Avoid using maps for input or return values (map is implemented as a pointer to a struct). Rather than passing map around, use a struct. Passing a slice to a function has even more complicated behavior: any modification to the contents is reflected, but use of _append_ is not reflected. As the only linear data structure, slices are often passed around in Go programs - by default you should assume that a slice is not modified by a function. Garbage - data that has no more pointers pointing to it. Once there are no more pointers pointing to some data, the memory can be reused. If the memory isn't recovered, the program's memory usage would continue to grow until the computer run out of RAM. The job of a garbage collector is to automatically detect unused memory and recover it. The Stack - consecutive block of memory, allocation fast and simple, local variables along parameters passed into a function stored on a stack. You have to know exactly how big it is at compile time. When the compiler determines that the data can't be stored on the stack, the data the pointer points to _escapes_ the stack and the compiler stores the data on the heap. The Heap - memory managed by the garbage collector. Go's garbage collector favours lower latency (< 500ms, finish as quickly as possible) over throughput (find the most garbage possible in a single scan). If your program creates a lot of garbage, the garbage collector will not find all the garbage during a cycle, slowing down the collector and increasing memory usage. Go encourages you to use pointers sparingly. We reduce the workload of the garbage collector by making sure that as much as possible is stored on the stack. ## Chapter 7: Types, Methods, and Interfaces Go is designed to encourage the best practices that are advocated by software engineers, avoiding inheritance while encouraging composition. Methods: `func (p Person) String() string`, `(p Person)` is like `self` or `this`, however it is non-idiomatic to use `self` or `this`. This is called a _receiver_, usually should have a short name. Methods can not be overloaded. You can't add methods to the types you don't control. - If method modifies the receiver, you _must_ use a pointer receiver - If method needs to handle _nil_ instances, you _must_ use a pointer receiver - If method doesn't modify the receiver, you can _use_ a value receiver When a type has any pointer receiver methods, a common practice is to be consistent and use pointer receivers for all methods, even the ones that don't modify the receiver. Do not write getters/setters. Go encourages you to directly access a field. Reserve methods for business logic. Defining a user-defined type based on other type, makes code clearer by providing a name for a concept and describing the kind of data that is expected (e.g. type `Percentage` vs `int`). Go doesn't have enumerations, instead it has `iota` - which allows you to assign an increasing value to a set of constants. `iota` makes sense when you care about being able to differentiate between a set of values, and don't particularly care what the value is behind the scenes. If the actual value matters, specify it explicitly. Embedding - promote methods on the embedded type to the containing struct. Embedding support is rare in programming languages. Do not mislead embedding with inheritance, they are not the same. If the containing struct has fields/methods with the same name, you need to use embedded field type to refer to the obscured fields/methods. The real star of Go's design - implicit interfaces. `interface` literal lists all methods that must be implemented by a concrete type to meet the interface. Interfaces are usually named with `er` endings (`io.Reader`, `io.Closer` , `json.Marshaller`, `http.Handler`). Go blends duck-typing and Java's interfaces. Implicit interfaces give the flexibility of changing implementation and make it easier to understand whe the code is doing. > Interfaces specify what callers need. The client code defines the interface to specify what functionality it requires. **Accept interfaces, return structs.** The business logic invoked by your functions should be invoked via interfaces, but the output of your functions should be a concrete type. Go encourages small interfaces. Sometimes you need to say that a variable could store any value, Go uses `interface{}` to represent this. It matches every type in Go. However, avoid this. Go was designed as a strongly typed language and attempts to work around this are unidiomatic. Dependency injection - code should explicitly specify the functionality it needs to perform its task. Implicit interfaces make dependency injection an excellent way to decouple your code. > "Dependency Injection" is a 25-dollar term for a 5-cent concept. [...] Dependency injection means giving an object its > instance variables. [...]. > Dependency injection is basically providing the objects that an object needs (its dependencies) instead of having it > construct them itself. It's a very useful technique for testing, since it allows dependencies to be mocked or stubbed > out. Use `Wire` if you think writing dependency injection code by hand is too much work. Go is not Object-Oriented, nor functional, nor procedural. It is practical. It borrows concepts from many places with the overriding goal of creating a language that is simple, readable, and maintainable by large teams for many years. ## Chapter 8: Errors Go handles errors by returning a value of type `error` as the last return value for a function (convention). The Go compiler requires that all variables must be read. Making errors returned values forces developers to either check and handle error conditions or make it explicit that they are ignoring errors by using an underscore (`_`) for the returned error value. `errors.New("denominator is 0")` - error messages should not be capitalized nor should they end with punctuation or new line. Second option is to create error using `fmt.Errorf("denominator is 0")` _sentinel errors_ - pattern, errors meant to signal that processing cannot continue due to a problem with the current state. By convention, their names start with `Err`. Be sure you need a sentinel error beg=fore you define one. It is part of your public API and you have committed to it being available in all future backward-compatible releases. `error` is an interface, you can define your own errors that include additional information for logging or error handling. Even when you define your own custom error types, always use `error` as the return type for the error result. Be sure you don't return an uninitialized instance (`var genErr StatusErr`), instead, explicitly return `nil`. _Wrapping the error_ - when you preserve an error while adding additional information. When you have a series of wrapped errors, it is called an _error chain_. You don't usually call `errors.Unwrap` directly. Instead, you use `errors.Is` and `errors.As` to find specific wrapped error. If you want to wrap an error with your custom error type, your error type needs to implement the `Unwrap` method. - `errors.Is` - to check if the returned error or any error that it wraps match a specific sentinel error instance - `errors.As` - allows you to check if a returned error (or any error it wraps) matches a specific type If there are situations in your programs that are unrecoverable, you can create your own panics. Go provides a way to capture a panic to provide a more graceful shutdown or to prevent shutdown at all. Reserve `panic` for fatal situations use `recover` as a way to gracefully handle these situations. If program panics, be careful about trying to continue executing after the panic. ## Chapter 9: Modules, Packages, and Imports A module is the root of a Go library or application, stored in a repository. Modules consist one or more packages, which give the module organization and structure. A collection of Go source code becomes a module when there is a valid `go.mod` file in its root directory -- `go mod init MODULE_PATH`. `MODULE_PATH` - globally unique name that identifies your module (e.g. github link). Go uses capitalization to determine if a package-level identifier is visible outside the package where it is declared. Anything you export is part of your package's API. Be sure you want to expose certain things to clients. Document all exported identifiers and keep the backward-compatible. As a general rule, make the name of the package match the name of the directory that contains the package. Package names should be descriptive. Don't repeat name in a function and package (`extract.Names` > `extract.ExtractNames`). If your code is small -- kep it in a single package. Introduce packages ac codebase grows. In case of conflicting names, you can alias an import (`import crand "crypto/rand`). Usage of `.` (imports all identifiers into the current package's namespace) is discouraged -- like usage of `*` in Python. Go has its own format form writing comments that are automatically converted into documentation -- `godoc` format. Place the documentation directly above the item being documented. Start the comment with the name of the item. Use a blank comment to break comment into multiple paragraphs. Use indenting. `go doc PACKAGE_NAME.IDENTIFIER_NAME` - views `godoc`. When you create a package called `internal`, the exported identifiers are only accessible to the direct parent of internal and the sibling packages of `internal`. You might want to rename or move some identifiers -- to avoid backward-breaking change, don't remove the original identifiers, provide an alternate name instead (`type Bar = Foo`). SemVer - semantic versioning: _major_._minor_._patch_: - `patch` - incremented when fixing a bug - `minor` - incremented when a new, backward-compatible feature is added - `major` - incremented when making a change that breaks backward compatibility The import compatibility rule says that all minor and patch versions of a module must be backward-compatible. If they aren't it is a bug. `pkg.go.dev` - a single service that gathers together documentation of Go modules. ## Chapter 10: Concurrency in Go Concurrency - the CS term for breaking up a single process into independent components and specifying how these components safely share data. Most languages provide concurrency via a library that uses OS-level threads that share data by attempting to acquire locks. Go is different, and is based on Communicating Sequential Processes. _Concurrency is not parallelism._ Concurrency is a tool to better structure the problem you are solving - whether concurrent code runs in parallel depends on the hardware and if the algorithm allows it. Whether you should use concurrency depends on how data flows through the steps in your program. Concurrency isn't free, it may come with a huge overhead. That's why concurrent code is used for I/O -- a lot of waiting, we can do different times in the meantime. `goroutine` - the core concept in GO's concurrency model. Lightweight processes, managed by the Go runtime. Faster to create than thread creation (no system-level resources). Small initial stack size, smaller than thread stack -- grows as needed. Switching between _goroutines_ is faster because it happens within the process. - process - an instance of a program that is being run - threads - a process is composed of one or more threads, a thread is a unit of execution that is given some time to run by the OS, threads within a process share resources Go is able to spawn even tens of thousands of simultaneous _goroutines_. Any function can be launched as a _goroutine_. Goroutines communicate using _channels_ (`ch := manke(chan int)`) - channels are reference types. Use `<-` to interact with a channel (read `<-chan`, write `chan<-`). Each value written to a channel can be read once. If multiple goroutines are reading from the same channel, a value will be read by only of them. By default, channels are unbuffered - every write to an open, unbuffered channel causes the writing goroutine to pause until another goroutine reads from the same channel. Buffered channels (`ch := make(chan int, 10)`) - these channels buffer a limited number of writes without blocking. Most of the time, use unbuffered channels. Any time you are reading from a channel that might be closed, use the comma ok idiom to ensure that the chanel is stil open. `select` - the control structure for concurrency in Go, solves _starvation_ problem. Checks if any of its cases can be processed, the deadlock is avoided. Select is often embedded within a for-loop. Concurrency practices and patterns: 1. Keep your APIS concurrency-free - never export channels or mutexes in your API. 2. Goroutines, for Loops, and Varying Variables - any time goroutine uses a variable whose value might change, pass the current value of the variable into the goroutine. 3. Always clean up your goroutines - make sure that it will eventually exit. If a goroutine doesn't exit, the scheduler will periodically give it time to do nothing. 4. The Done Channel Pattern - provides a way to signal a goroutine that it's time to stop processing. It uses a channel to signal that it is time to exit. 5. Using a cancel function to terminate a goroutine - return a cancellation function alongside the channel. 6. When to use buffered and unbuffered channels - buffered channels are useful when you know how many goroutines you have launched, want to limit the number of goroutines you will launch, or want to limit the amount of work that is queued up. 7. Backpressure - systems perform better when their components limit the amount of work they are willing to perform. We can use buffered channel and a select statement to limit the number of simultaneous requests in a system. 8. Turning off a case in a select - if one of the cases in a _select_ is reading a closed channel, it will always be successful. Use a `nil` channel to disable a case, set the channel's variable to `nil` and then `continue`. 9. How to time out code - use `case <- time.After(2 * time.Second):`. 10. Using WaitGroups - sometime some goroutine needs to wait for multiple goroutines to complete their work. If you are waiting for a single goroutine, you can use the done channel pattern that we saw earlier. But if you are waiting gon several goroutines, you need to use a `WaitGroup`. 11. Running code exactly once - `sync.Once` - a handy type that enables this functionality. 12. Putting our concurrent tools together - by structuring our code with goroutines, channels and select statements, we separate the individual parts to run and complete in any order and cleanly exchange data between the dependant parts. `mutex` - mutual exclusion, the job of a mutex is to limit the concurrent execution of some code or access to a shared piece of data.This protected part is called the _critical section_. > Share memory by communicating, do not communicate by sharing memory. Decision tree - use channels or mutexes: - If you are coordinating goroutines or tracking a value as it is transformed by a series of goroutines, use channels - If you are sharing access to a field in a struct, use mutexes - If you discover a critical performance issue when using channels, and you cannot find any other way to fix the issue, modify your code to use a mutex ## Chapter 11: The Standard Library Like Python, Go has "batteries included" philosophy - it provides many of the tools that you need to build an application. `io` - contains one of the most useful interfaces - `io.Writer` and `io.Reader`. `time` - two main types used to represent time - `time.Duration` (used to represent a period of time, e.g.: `2 * time.Hour`) and `time.Time` (used to represent a moment of time). It is possible to extract month, day, year, ... from `Time`. Most OS keep track of two different sorts of time: - the wall clock - current time - monotonic clock - counts up from the time the computer was booted `encoding/json` - Go includes support for converting Go data types to and from JSON. - marshalling - Go data type -> encoding - unmarshalling - encoding -> Go data type We specify the rules for processing our JSON with _struct tags_, strings that are written after the fields in a struct (`tagName: "tagValue"`, e.g.: `json:"id"`). `net/http` - a production of quality HTTP/2 client and server. - Client - make HTTP requests and receive HTTP responses - Server - responsible for listening for HTTP requests Even though Go provide the server, use idiomatic third-party modules to enhance the server. ## Chapter 12: The Context Servers need a way to handle metadata on individual requests. Go uses a construct called the context. Context - an instance that meets the Context interface. An empty context is a starting point: each time you add metadata to the context, you do so by wrapping the existing context using one of the factory functions in the context package. Cancellation - a request that spawns several goroutines, each one calling a different HTTP service. If one service returns an error that prevents you from returning a valid response, there is no point in continuing to process the other goroutines. In go this is called _cancellation_. There are 4 things a server can do to manage its load: - Limit simultaneous requests - Limit how many requests are queued waiting to run - Limit how long a request can run - Limit the resources a request can use Go provides tools to handle the first three - first two -> limit number of goroutines, the context provides a way to control how long a request runs. The context provides a way to pass per-request metadata through your program. ## Chapter 13: Writing Tests Go includes testing support as part of its standard library. The `testing` package provides the types anf unctions to write tests, while the `go test` tool runs your tests and generates reports. Go tests are placed in the same directory and the same package as the production code. Tests are able to access and test un-exported functions and variables. If you want to test just the public API, Go has a special convention for this. Use `packagename_test` for the package name. Every test written in a file whose name ends with `_test.go`. Test functions start with the word `Test` and take in a single parameter of type `*testing.T`. It is possible to write set-up and tear-down code. Use `go-cmp` (third-party module) in order to compare two instances of a compound type. Adding the `-cover` flag to the `go test` command calculates coverage information and includes a summary in the test output. `-coverprofile=c.out` saves the coverage infor to a file. `-html=c.out` generates an HTML representation of your source code coverage. > Code coverage is necessary, but it is not sufficient. You can have 100% code coverage and still have bugs in your > code. > When your code depends on abstractions, it is easier to write unit tests. A stub returns a canned value for given output, whereas a mock validates that a set of calls happen in the expected order with the expected inputs. `httpest` package to make it easier to stub HTTP services. Even though `httptest` provides a way to avoid testing external services, you should still write _integration_ tests - automated tests that connect to other services. These validate your understanding of the service's APIs is correct.The challenge is figuring out how to group your automated tests - you want to run integration tests when the support environment is present. Also, integration tests tend to be slower than unit tests, so they usually run less frequently. Go includes a _race checker_ - it helps to find accidental references to a variable from two different goroutines without acquiring a lock. It is not guaranteed to find every single data race in your code, but if it finds one, you should put proper locks around what it finds. Do not solve race conditions by inserting "sleeps" into the code. ## Chapter 14: Here There Be Dragons: Reflect, Unsafe, and Cgo Go is a safe language, but sometimes your Go programs need to venture out into less defined areas. Reflection allows us to examine types at runtime. It also provides the ability to examine, modify, and create variables, functions, and structs at runtime. - `database/sql` - uses reflection to send requests to databases and read data back - `text/template` and `html/template` - use reflection to process the values that are passed to the templates - `fmt` - uses reflection to detect the type of the provided parameters - `errors` - uses reflection to implement `errors.Is` and `errors.As` - `sort` - uses reflection to implement functions that sort and evaluate slices of any type Most of these examples have one thing in common - they involve accessing and formatting data that is being imported into or exported out of Go program. `reflect` package is build around 3 core concepts: - `types` - `reflect.TypeOf` returns a value of type `reflect.Type`, which represents the type of variable passed into the function - `kinds` - `Kind` method on `reflect.Type` returns a value of type `reflect.Kind`, which is a constant that says what the type is made of - a slice, a map, a pointer, a struct, an interface, an array, a function, an int, ... - `values` - we can use `reflec.ValueOf` to create a `reflect.Value` instance that represents the value of a variable Other use cases: - use reflection to check if an interface's value is nil - use reflection to write a data marshaller - use reflection to automate repetitive tasks, e.g. create a new function without writing repetitive code While reflection is essential when converting data at the boundaries of Go, be careful using it in other situations. `unsafe` - allows to manipulate memory. Very small and very odd. There are 2 common patterns in `unsafe` code: - conversion between 2 types of variables that are normally not convertable - reading/modifying the bytes in a variable The majority of _unsafe_ usages were motivated by integration with operating systems and C code. Developers also frequently use _unsafe_ to write more efficient Go code. The _unsafe_ package is powerful and low-level! Avoid using it unless you know what you are doing, and you need the performance improvements that it provides. Nearly every programming language provides a way to integrate with C libraries. Go calls its FFI (Foreign Function Interface) to C `cgo`. `cgo` is for integration, not performance. `cgo` isn't fast, and it is not easy to use for nontrivial programs, the only reason to use `cgo` is if there is a C library that you must use and there is no suitable Go replacement. ## Chapter 15: A Look at the Future: Generics in Go Generics reduce repetitive code and increase type safety. Generics is the concept that it is sometimes useful to write functions where the specific type of parameter or field is specified when it is used. Many common algorithms, such as map, reduce, and filter had to be reimplemented for different types. > Properly written, Go is boring... well-written Go programs tend to be straightforward and sometimes a bit repetitive. ================================================ FILE: books/hands-on-ml.md ================================================ [go back](https://github.com/pkardas/learning) # Hands-On Machine Learning with Scikit-Learn, Keras, and TensorFlow: Concepts, Tools, and Techniques to Build Intelligent Systems Book by Aurelien Geron [TOC] TODO: *Re-read Part I.* ## Chapter 10: Introduction to Artificial Neural Networks with Keras ANNs - Artificial Neural Networks - inspired by the networks of biological neurons, have gradually become quite different from their biological cousins. ANNs introduced in 1943 by McCulloch and Pitts - a simplified computational model of how biological neurons might work together in animal brains to perform complex computation using propositional logic. McCulloch and Pitts proposed an artificial neuron that has one or more binary inputs (on/off) and one binary output. The artificial neuron activates its output when more than a certain number of its inputs are active. They showed that even such simplified model is possible capable of performing various logical computations. The Perceptron - is one of the simplest ANN architectures, invented in 1957. It is based on slightly different artificial neuron - threshold logic unit or linear threshold unit. The inputs and outputs are numbers (instead of binary on/off values) and each input connection is associated with a weight. The TLU computes a weighted sum of its inputs, then applies a step function to that sum and outputs the result. Most commonly used step function is the Heaviside step function. A single TLU can be used for simple linear binary classification. A perceptron is composed of a single layer of TLUs, each TLU connected to all inputs (when all the neurons are connected to every neuron in the previous layer, the layer is called a fully connected layer). The inputs of the Perceptron are fed to special passthrough neurons from the input layer. Extra bias feature is generally added (neuron that always output 1). A Perceptron with 2 inputs and three outputs can classify instances simultaneously into three different binary classes - multi-output classifier. How perceptron is trained? "Cells that fire together, wire together" - the connection between weight between 2 neurons tend to increase when they fire simultaneously (Hebb's rule). The perceptron is fed one example at a time, when it outputs wrong answer, it reinforces the connection weights from the inputs that would have contributed to the correct answer. In fact single Perceptron is similar to SDGClassifier. Back-propagation training algorithm - it is Gradient Descent using an efficient technique for computing the gradients automatically in just 2 passes through network - one forward, one backward. It can find out how each connection weight and each bias term should be tweaked in order to reduce the error. In other words: for each training instance, the back propagation algorithm first makes a prediction (forward pass) and measures the error, then goes through each layer in reverse to measure the error contribution from each connection ( reverse pass) and finally tweaks the connection weights to reduce error (Gradient Descent step). When building MLP for regression you don't want to use any activation function for the output neurons, so they are free to output any range of values. If output needs to be always positive ReLU can be used in the output layer. The loss function to use during training is typically the mean squared error, but if there are many outliers in the training set, mean absolute error might be a better choice. MLP can be used also for classification. Tensorflow 2 adopted Keras' high-level API + introduced some additional functionalities. **Sequential API** - the simplest kind of Keras model for neural networks that are just composed of a single stack of layers connected sequentially. Flatten - preprocessing layer whose role is to convert each input into 1D array. Once model is defined it needs to be compiled - you need to specify loss function and optimiser to use, optionally list of metrics can be passed. Then model can be trained. If the training set is very skewed, with some classes being overrepresented and others underrepresented, it would be useful to set the class_weight argument when calling the fit method. If you are not satisfied with model's performance - adjust hyperparametrs if longer training is not bringing any additional benefits. Model estimates probabilities per class. When layers are created they are called like functions -> `keras.layers.Dense(30)(prev_layer)` - This is why it is called the **Functional API**, this is the way of telling Keras how to join layers. A model can have multiple inputs and multiple outputs, depending on the task. Sequential API and Functional API are declarative, for more declarative programming style is **Subclassing API**. Simply subclass the `Model` class, create layers in the constructor and use them to perform computations in the `call` method. Subclassing API is very limited, it does not allow viewing model's summary, also Keras na not inspect the model ahead of time. So Sequential and Functional APIs are preferred. It is possible to save and load Keras model to/from disk. Keras will use HDF5 format to save model's architecture and all the values of all the model parameters for every layer (weights and bias). When training enormous model, it is a good idea to save checkpoints at regular intervals during training to avoid loosing everything if computer crashes. In order to make checkpoints you have to use callbacks. ================================================ FILE: books/head-first-design-patterns/ch_01_strategy.py ================================================ class FlyBehavior: def fly(self) -> None: raise NotImplementedError class QuackBehavior: def quack(self) -> None: raise NotImplementedError class Duck: def __init__(self, fly_behavior: FlyBehavior, quack_behavior: QuackBehavior) -> None: self.fly_behavior = fly_behavior self.quack_behavior = quack_behavior def perform_fly(self) -> None: self.fly_behavior.fly() def perform_quack(self) -> None: self.quack_behavior.quack() def display(self) -> None: raise NotImplementedError class FlyWithWings(FlyBehavior): def fly(self) -> None: print("I am using wings!") class FlyNoWay(FlyBehavior): def fly(self) -> None: print("I am not flying.") class Quack(QuackBehavior): def quack(self) -> None: print("QUACK") class Squeak(QuackBehavior): def quack(self) -> None: print("SQUEAK") class MuteQuack(QuackBehavior): def quack(self) -> None: print("") class MallardDuck(Duck): def __init__(self) -> None: super().__init__(FlyWithWings(), Quack()) def display(self) -> None: print("Looks like a mallard.") duck = MallardDuck() duck.display() duck.perform_fly() duck.perform_quack() ================================================ FILE: books/head-first-design-patterns/ch_02_observer.py ================================================ class Observer: def update(self) -> None: raise NotImplementedError class Subject: def register_observer(self, observer: Observer) -> None: raise NotImplementedError def remove_observer(self, observer: Observer) -> None: raise NotImplementedError def notify_observers(self) -> None: raise NotImplementedError class DisplayElement: def display(self) -> None: raise NotImplementedError class WeatherData(Subject): def __init__(self): self._observers = [] self.temperature = 0.0 self.humidity = 0.0 self.pressure = 0.0 def register_observer(self, observer: Observer) -> None: self._observers.append(observer) def remove_observer(self, observer: Observer) -> None: self._observers.remove(observer) def notify_observers(self) -> None: for observer in self._observers: observer.update() def set_measurements(self, temperature: float, humidity: float, pressure: float) -> None: self.temperature = temperature self.humidity = humidity self.pressure = pressure self.notify_observers() class CurrentConditionsDisplay(Observer, DisplayElement): def __init__(self, weather_data: WeatherData): self._temperature = 0.0 self._humidity = 0.0 self._weather_data = weather_data self._weather_data.register_observer(self) def display(self) -> None: print(f"Current conditions: {self._temperature}°C, {self._humidity}%") def update(self) -> None: self._temperature = self._weather_data.temperature self._humidity = self._weather_data.humidity self.display() class AvgTempDisplay(Observer, DisplayElement): def __init__(self, weather_data: WeatherData): self._temperature = [] self._weather_data = weather_data self._weather_data.register_observer(self) def display(self) -> None: print(f"Average temperature: {sum(self._temperature) / len(self._temperature)}°C") def update(self) -> None: self._temperature.append(self._weather_data.temperature) self.display() weather_data = WeatherData() current_display = CurrentConditionsDisplay(weather_data) forecast_display = AvgTempDisplay(weather_data) weather_data.set_measurements(23.0, 68.1, 1018.0) weather_data.set_measurements(24.2, 70.4, 1019.2) weather_data.set_measurements(25.8, 71.2, 1018.4) ================================================ FILE: books/head-first-design-patterns/ch_03_decorator.py ================================================ class Beverage: @property def description(self) -> str: return self.__class__.__name__ @property def cost(self) -> float: raise NotImplementedError class CondimentDecorator(Beverage): def __init__(self, beverage: Beverage): self._beverage = beverage @property def description(self) -> str: return f"{self._beverage.description}, {super(CondimentDecorator, self).description}" @property def cost(self) -> float: raise NotImplementedError class Espresso(Beverage): @property def cost(self) -> float: return 1.99 class HouseBlend(Beverage): @property def cost(self) -> float: return 0.89 class Mocha(CondimentDecorator): @property def cost(self) -> float: return self._beverage.cost + 0.20 class Soy(CondimentDecorator): @property def cost(self) -> float: return self._beverage.cost + 0.15 beverage = Espresso() beverage = Mocha(beverage) beverage = Mocha(beverage) beverage = Soy(beverage) print(f"${beverage.cost} for '{beverage.description}'") ================================================ FILE: books/head-first-design-patterns/ch_04_factory.py ================================================ class Ingredient: def __init__(self): print(self.__class__.__name__) class ThinCrustDough(Ingredient): pass class ThickCrustDough(Ingredient): pass class MarinaraSauce(Ingredient): pass class PlumTomatoSauce(Ingredient): pass class MozzarellaCheese(Ingredient): pass class ReggianoCheese(Ingredient): pass class Garlic(Ingredient): pass class Onion(Ingredient): pass class Mushroom(Ingredient): pass class SlicedPepperoni(Ingredient): pass class FreshClams(Ingredient): pass class FrozenClams(Ingredient): pass class PizzaIngredientFactory: def create_dough(self): raise NotImplementedError def create_sauce(self): raise NotImplementedError def create_cheese(self): raise NotImplementedError def create_veggies(self): raise NotImplementedError def create_pepperoni(self): raise NotImplementedError def create_clam(self): raise NotImplementedError class NYPizzaIngredientFactory(PizzaIngredientFactory): def create_dough(self): return ThinCrustDough() def create_sauce(self): return MarinaraSauce() def create_cheese(self): return ReggianoCheese() def create_veggies(self): return [Garlic(), Onion()] def create_pepperoni(self): return SlicedPepperoni() def create_clam(self): return FreshClams() class ChicagoPizzaIngredientFactory(PizzaIngredientFactory): def create_dough(self): return ThickCrustDough() def create_sauce(self): return PlumTomatoSauce() def create_cheese(self): return MozzarellaCheese() def create_veggies(self): return [Garlic(), Mushroom()] def create_pepperoni(self): return SlicedPepperoni() def create_clam(self): return FrozenClams() class Pizza: name = ... def __init__(self, ingredient_factory: PizzaIngredientFactory): self._ingredient_factory = ingredient_factory def prepare(self) -> None: raise NotImplementedError def bake(self) -> None: print("Bake for 25 minutes at 350") def cut(self) -> None: print("Cutting the pizza into diagonal slices") def box(self) -> None: print("Place the pizza in official PizzaStore box") class CheesePizza(Pizza): def prepare(self) -> None: print(f"Preparing {self.name}") self._ingredient_factory.create_dough() self._ingredient_factory.create_sauce() self._ingredient_factory.create_cheese() class ClamPizza(Pizza): def prepare(self) -> None: print(f"Preparing {self.name}") self._ingredient_factory.create_dough() self._ingredient_factory.create_sauce() self._ingredient_factory.create_cheese() self._ingredient_factory.create_clam() class PizzaStore: def order_pizza(self, pizza_type: str) -> Pizza: pizza = self.create_pizza(pizza_type) pizza.prepare() pizza.bake() pizza.cut() pizza.box() return pizza # Factory Method: def create_pizza(self, pizza_type: str) -> Pizza: raise NotImplementedError class NYPizzaStore(PizzaStore): def create_pizza(self, pizza_type: str) -> Pizza: ingredient_factory = NYPizzaIngredientFactory() match pizza_type: case "cheese": pizza = CheesePizza(ingredient_factory) pizza.name = "NY Style Sauce and Cheese Pizza" case "clam": pizza = ClamPizza(ingredient_factory) pizza.name = "NY Style Sauce and Clam Pizza" case _: raise RuntimeError("Unknown pizza type") return pizza class ChicagoPizzaStore(PizzaStore): def create_pizza(self, pizza_type: str) -> Pizza: ingredient_factory = ChicagoPizzaIngredientFactory() match pizza_type: case "cheese": pizza = CheesePizza(ingredient_factory) pizza.name = "Chicago Style Deep Dish Cheese Pizza" case "clam": pizza = ClamPizza(ingredient_factory) pizza.name = "Chicago Style Deep Dish Clam Pizza" case _: raise RuntimeError("Unknown pizza type") return pizza ny_store = NYPizzaStore() ny_store.order_pizza("cheese") chicago_store = ChicagoPizzaStore() chicago_store.order_pizza("cheese") ================================================ FILE: books/head-first-design-patterns/ch_05_singleton.py ================================================ class ChocolateBoiler: _instance = None def __new__(cls): if not cls._instance: cls._instance = super(ChocolateBoiler, cls).__new__(cls) return cls._instance boiler_0 = ChocolateBoiler() boiler_1 = ChocolateBoiler() print(f"#0: {boiler_0}") print(f"#1: {boiler_1}") print(f"Are they the same object? {boiler_0 is boiler_1}") # Implementation using variable - instantiated on module import: class ChocolateBoiler: pass chocolate_boiler = ChocolateBoiler() print(f"Are they the same object? {chocolate_boiler is chocolate_boiler}") # Implementation using function - using 'attr': def get_chocolate_boiler() -> ChocolateBoiler: if not hasattr(get_chocolate_boiler, "instance"): setattr(get_chocolate_boiler, "instance", ChocolateBoiler()) return getattr(get_chocolate_boiler, "instance") print(f"Are they the same object? {get_chocolate_boiler() is get_chocolate_boiler()}") # Implementation using function - using variable: _chocolate_boiler = None def get_chocolate_boiler() -> ChocolateBoiler: global _chocolate_boiler if not _chocolate_boiler: _chocolate_boiler = ChocolateBoiler() return _chocolate_boiler print(f"Are they the same object? {get_chocolate_boiler() is get_chocolate_boiler()}") ================================================ FILE: books/head-first-design-patterns/ch_06_command.py ================================================ from typing import List class Device: @property def name(self) -> str: return self.__class__.__name__ def on(self) -> None: print(f"{self.name} was turned on") def off(self) -> None: print(f"{self.name} was turned off") class Light(Device): pass class Tv(Device): pass class Stereo(Device): def __init__(self) -> None: self.volume = 0 def set_cd(self) -> None: print(f"{self.name} CD set") def set_volume(self, volume: int) -> None: print(f"{self.name} Volume set to {volume}") self.volume = volume class Command: def execute(self) -> None: raise NotImplementedError def undo(self) -> None: raise NotImplementedError class NoCommand(Command): def execute(self) -> None: pass def undo(self) -> None: pass class MarcoCommand(Command): def __init__(self, commands: List[Command]): self._commands = commands def execute(self) -> None: for command in self._commands: command.execute() def undo(self) -> None: for command in self._commands[::-1]: command.undo() class DeviceOnCommand(Command): def __init__(self, device: Device) -> None: self._device = device def execute(self) -> None: self._device.on() def undo(self) -> None: self._device.off() class DeviceOffCommand(Command): def __init__(self, device: Device) -> None: self._device = device def execute(self) -> None: self._device.off() def undo(self) -> None: self._device.on() class StereoVolumeUpCommand(Command): def __init__(self, stereo: Stereo) -> None: self._stereo = stereo def execute(self) -> None: self._stereo.set_volume(stereo.volume + 1) def undo(self) -> None: self._stereo.set_volume(stereo.volume - 1) class RemoteControl: def __init__(self): self._on_commands = [NoCommand()] * 7 self._off_commands = [NoCommand()] * 7 self._undo_commands = [] def set_command(self, slot: int, on_command: Command, off_command: Command) -> None: self._on_commands[slot] = on_command self._off_commands[slot] = off_command def on_button_pushed(self, slot: int) -> None: self._on_commands[slot].execute() self._undo_commands.append(self._on_commands[slot]) def off_button_pushed(self, slot: int) -> None: self._off_commands[slot].execute() self._undo_commands.append(self._off_commands[slot]) def undo_button_pushed(self) -> None: if not self._undo_commands: return self._undo_commands.pop().undo() light = Light() tv = Tv() stereo = Stereo() light_on_command, light_off_command = DeviceOnCommand(light), DeviceOffCommand(light) tv_on_command, tv_off_command = DeviceOnCommand(tv), DeviceOffCommand(tv) stereo_on_command, stereo_off_command = DeviceOnCommand(stereo), DeviceOffCommand(stereo) volume_up_command = StereoVolumeUpCommand(stereo) party_on_command = MarcoCommand([light_on_command, tv_on_command, stereo_on_command, volume_up_command]) party_off_command = MarcoCommand([light_on_command, tv_on_command, stereo_off_command]) remote = RemoteControl() remote.set_command(0, light_on_command, light_off_command) remote.set_command(1, tv_on_command, tv_off_command) remote.set_command(2, stereo_on_command, stereo_off_command) remote.set_command(3, party_on_command, party_off_command) remote.on_button_pushed(1) remote.on_button_pushed(3) remote.undo_button_pushed() ================================================ FILE: books/head-first-design-patterns/ch_07_adapter.py ================================================ class Duck: def quack(self) -> None: raise NotImplementedError def fly(self) -> None: raise NotImplementedError class Turkey: def gobble(self) -> None: raise NotImplementedError def fly(self) -> None: raise NotImplementedError class WildTurkey(Turkey): def gobble(self) -> None: print("Gobble Gobble") def fly(self) -> None: print("I am flying a short distance") class TurkeyAdapter(Duck): def __init__(self, turkey: Turkey): self._turkey = turkey def quack(self) -> None: self._turkey.gobble() def fly(self) -> None: self._turkey.fly() # We ran out of ducks, so we use turkeys: turkey = WildTurkey() turkey_adapter = TurkeyAdapter(turkey) turkey_adapter.quack() ================================================ FILE: books/head-first-design-patterns/ch_07_facade.py ================================================ from unittest.mock import Mock class HomeTheaterFacade: def __init__(self, amplifier, tuner, projector, lights, screen, player, popper): self._amplifier = amplifier self._tuner = tuner self._projector = projector self._lights = lights self._screen = screen self._player = player self._popper = popper # Wrap complex behavior into single method: def watch_movie(self, movie): self._popper.on() self._popper.pop() self._lights.dim(10) self._screen.down() self._projector.on() self._amplifier.on() self._amplifier.set_volume(20) self._player.on() self._player.play(movie) home_theater = HomeTheaterFacade(*([Mock()] * 7)) home_theater.watch_movie("Joker") ================================================ FILE: books/head-first-design-patterns/ch_08_template_method.py ================================================ class CaffeineBeverage: def prepare_recipe(self) -> None: self._boil_water() self._brew() self._pour_in_cup() self._add_condiments() def _boil_water(self) -> None: print("Boiling water") def _pour_in_cup(self) -> None: print("Pouring in a cup") def _brew(self) -> None: raise NotImplementedError def _add_condiments(self) -> None: raise NotImplementedError class Tea(CaffeineBeverage): def _brew(self) -> None: print("Steeping the tea") def _add_condiments(self) -> None: print("Adding Lemon") class Coffee(CaffeineBeverage): def _brew(self) -> None: print("Dripping Coffee through filter") def _add_condiments(self) -> None: print("Adding Sugar and Milk") Coffee().prepare_recipe() Tea().prepare_recipe() ================================================ FILE: books/head-first-design-patterns/ch_09_composite.py ================================================ from __future__ import annotations from abc import ABC from dataclasses import dataclass class MenuComponent: def add(self, menu_component: MenuComponent): raise NotImplementedError def remove(self, menu_component: MenuComponent): raise NotImplementedError def get_child(self, i: int): raise NotImplementedError def print(self): raise NotImplementedError @dataclass class MenuItem(MenuComponent, ABC): name: str description: str vegetarian: bool price: float def print(self): print(f"{self.name}, {self.price}, {self.description}") class Menu(MenuComponent): def __init__(self, name: str): self._name = name self._menu_components = [] def add(self, menu_component: MenuComponent): self._menu_components.append(menu_component) def remove(self, menu_component: MenuComponent): self._menu_components.remove(menu_component) def get_child(self, i: int): return self._menu_components[i] def print(self): print(self._name) for menu_component in self._menu_components: menu_component.print() class Waitress: def __init__(self, menu_component: MenuComponent): self._menu_component = menu_component def print_menu(self): self._menu_component.print() breakfast_menu = Menu("BREAKFAST") dinner_menu = Menu("DINNER") dessert_menu = Menu("DESSERT") all_menus = Menu("ALL MENUS") all_menus.add(breakfast_menu) all_menus.add(dinner_menu) dinner_menu.add(MenuItem("Pasta", "Pasta with marinara Sauce", True, 3.89)) dinner_menu.add(dessert_menu) dessert_menu.add(MenuItem("Apple Pie", "Apple pie with a flaky crust, topped with vanilla ice cream", True, 1.59)) Waitress(all_menus).print_menu() ================================================ FILE: books/head-first-design-patterns/ch_09_iterator.py ================================================ from collections.abc import Iterator from dataclasses import dataclass from typing import ( Dict, List, Union, ) @dataclass class MenuItem: name: str description: str vegetarian: bool price: float class DinnerMenuIterator(Iterator): # Just for demonstration purposes! def __init__(self, collection: List[MenuItem]): self._collection = collection self._position = 0 def __next__(self) -> MenuItem: try: value = self._collection[self._position] self._position += 1 except IndexError: raise StopIteration() return value class DinnerMenu: # Just for demonstration purposes! menu = [ MenuItem("Vegetarian BLT", "Fake Bacon with lettuce on whole wheat", True, 2.99), MenuItem("BLT", "Bacon with lettuce on whole wheat", False, 2.99), MenuItem("Soup of the day", "Soup of the day, with a side of potato salad", False, 3.99), MenuItem("HotDog", "A Hot Dog with sauerkraut, relish, onions, topped with cheese", False, 3.05), ] def __iter__(self) -> DinnerMenuIterator: # Factory Method return DinnerMenuIterator(self.menu) class BreakfastMenuIterator(Iterator): # Just for demonstration purposes! def __init__(self, collection: Dict[str, MenuItem]): self._collection = collection self._position = 0 def __next__(self) -> MenuItem: try: value = list(self._collection.values())[self._position] self._position += 1 except IndexError: raise StopIteration() return value class BreakfastMenu: # Just for demonstration purposes! menu = { "K&B's Pancake Breakfast": MenuItem("K&B's Pancake Breakfast", "Pancakes with scrambled eggs and toast", True, 2.99), "Regular Pancake Breakfast": MenuItem("Regular Pancake Breakfast", "Pancakes with fried eggs, sausage", False, 2.99), "Blueberry Pancakes": MenuItem("Blueberry Pancakes", "Pancakes made with fresh blueberries", True, 3.49), } def __iter__(self) -> BreakfastMenuIterator: # Factory Method return BreakfastMenuIterator(self.menu) class Waitress: def __init__(self, pancake_menu: BreakfastMenu, dinner_menu: DinnerMenu): self._pancake_menu = pancake_menu self._dinner_menu = dinner_menu def print_menu(self): print("BREAKFAST") self._print_menu(self._pancake_menu) print("DINNER") self._print_menu(self._dinner_menu) @staticmethod def _print_menu(menu: Union[BreakfastMenu, DinnerMenu]): for menu_item in menu: print(f"{menu_item.name}, {menu_item.price}, {menu_item.description}") Waitress(BreakfastMenu(), DinnerMenu()).print_menu() ================================================ FILE: books/head-first-design-patterns/ch_10_state.py ================================================ from __future__ import annotations from random import random class State: def __init__(self, gumball_machine: GumballMachine): self._gumball_machine = gumball_machine def insert_quarter(self) -> None: pass def eject_quarter(self) -> None: pass def turn_crank(self) -> None: pass def dispense(self) -> None: pass class NoQuarterState(State): def insert_quarter(self) -> None: print("You inserted a quarter") self._gumball_machine.state = self._gumball_machine.has_quarter_state class HasQuarterState(State): def eject_quarter(self) -> None: print("Quarter returned") self._gumball_machine.state = self._gumball_machine.no_quarter_state def turn_crank(self) -> None: print("You turned...") if random() < 0.1 and self._gumball_machine.count > 1: self._gumball_machine.state = self._gumball_machine.winner_state else: self._gumball_machine.state = self._gumball_machine.sold_state class SoldState(State): def dispense(self) -> None: self._gumball_machine.release_ball() if self._gumball_machine.count > 0: self._gumball_machine.state = self._gumball_machine.no_quarter_state else: print("Out of gumballs!") self._gumball_machine.state = self._gumball_machine.sold_out_state class SoldOutState(State): pass class WinnerState(State): def dispense(self) -> None: self._gumball_machine.release_ball() if self._gumball_machine.count == 0: self._gumball_machine.state = self._gumball_machine.sold_out_state else: self._gumball_machine.release_ball() print("You are a WINNER!") if self._gumball_machine.count > 0: self._gumball_machine.state = self._gumball_machine.no_quarter_state else: print("Out of gumballs!") self._gumball_machine.state = self._gumball_machine.sold_out_state class GumballMachine: def __init__(self, count: int): self.count = count self.no_quarter_state = NoQuarterState(self) self.has_quarter_state = HasQuarterState(self) self.sold_state = SoldState(self) self.sold_out_state = SoldOutState(self) self.winner_state = WinnerState(self) self.state = self.no_quarter_state if count > 0 else self.sold_out_state def insert_quarter(self) -> None: self.state.insert_quarter() def eject_quarter(self) -> None: self.state.eject_quarter() def turn_crank(self) -> None: self.state.turn_crank() self.state.dispense() def release_ball(self) -> None: print("A ball rolling out the slot...") if self.count > 0: self.count = self.count - 1 machine = GumballMachine(5) machine.insert_quarter() machine.turn_crank() machine.insert_quarter() machine.turn_crank() machine.insert_quarter() machine.turn_crank() ================================================ FILE: books/head-first-design-patterns/ch_11_virtual_proxy.py ================================================ class Icon: @property def width(self) -> int: raise NotImplementedError @property def height(self) -> int: raise NotImplementedError def paint_icon(self) -> None: raise NotImplementedError class ImageIcon(Icon): @property def width(self) -> int: return 1280 @property def height(self) -> int: return 720 def paint_icon(self) -> None: print(":)") class ImageProxy(Icon): def __init__(self, url: str): self._image_icon = None self._url = url # Following 'if' statements can be reworked to use The State Pattern: ImageNotLoaded and ImageLoaded @property def width(self) -> int: return self._image_icon.width if self._image_icon else 600 @property def height(self) -> int: return self._image_icon.height if self._image_icon else 800 def paint_icon(self) -> None: if not self._image_icon: # Download image from the internet print(f"Downloading the image from '{self._url}'") self._image_icon = ImageIcon() self._image_icon.paint_icon() image = ImageProxy("whatever://image") image.paint_icon() ================================================ FILE: books/head-first-design-patterns/notes.md ================================================ [go back](https://github.com/pkardas/learning) # Head First Design Patterns: Building Extensible and Maintainable Object-Oriented Software Book by Eric Freeman and Elisabeth Robson Code here: [click](.) - [Chapter 1: The Strategy Pattern - Welcome to Design Patterns](#chapter-1-welcome-to-design-patterns) - [Chapter 2: The Observer Pattern - Keeping your Objects in the Know](#chapter-2-keeping-your-objects-in-the-know) - [Chapter 3: The Decorator Pattern - Decorating Objects](#chapter-3-decorating-objects) - [Chapter 4: The Factory Pattern - Baking with OO Goodness](#chapter-4-baking-with-oo-goodness) - [Chapter 5: The Singleton Pattern - One-of-a-kind Objects](#chapter-5-one-of-a-kind-objects) - [Chapter 6: The Command Pattern - Encapsulating Invocation](#chapter-6-encapsulating-invocation) - [Chapter 7: The Adapter and Facade Patterns - Being Adaptive](#chapter-7-being-adaptive) - [Chapter 8: The Template Method Pattern - Encapsulating Algorithms](#chapter-8-encapsulating-algorithms) - [Chapter 9: The Iterator and Composite Patterns - Well-Managed Collections](#chapter-9-well-managed-collections) - [Chapter 10: The State Pattern - The State of Things](#chapter-10-the-state-of-things) - [Chapter 11: The Proxy Pattern - Controlling Object Access](#chapter-11-controlling-object-access) - [Chapter 12: Compound patterns - Patterns of Patterns](#chapter-12-patterns-of-patterns) - [Chapter 13: Patterns in the Real World](#chapter-13-patterns-in-the-real-world) - [Chapter 14: Appendix - Leftover Patterns](#chapter-14-leftover-patterns) ## Chapter 1: Welcome to Design Patterns [The Strategy Pattern - Pattern implementation in Python](https://github.com/pkardas/learning/blob/master/books/head-first-design-patterns/ch_01_strategy.py) Someone has already solved your problems. You can exploit the wisdom and lessons learned by other developers who have been down the same design problems road and survived the trip. Instead of code reuse, with patterns you get experience reuse. Example with ducks, adding `fly` method to the `Duck` superclass turned out to introduce a bug to the `RubberDuck` subclass. A localised update to the code caused a non-local side effect (flying rubber-duck). *Which of the following are disadvantages of using inheritance to provide Duck behaviour?* - My answer: [D] It is hard to gain knowledge of all duck behaviours. [F] Changes can unintentionally affect other ducks. *What do YOU think about the design? What would you do if you were Joe?* - My answer: New features would require adding many interfaces, for example: interface for migrating birds. Maybe instead, it would be better to have 2 types of ducks: Living and non-living and instead of introducing a single class per duck, reuse classes and make them parametrised with a name. There is one constant in software development. What is the one thing you can always count on in software development. **CHANGE**. No matter how well you design an application, over time an application must grow and change, or it will die. *List some reasons you have had to change code in your application*: - New definition of the operations process. - Better understanding of the domain. - Requirement to use worker instead of lambda. - New library for the JSON serialisation. We know using inheritance hasn't worked out very well. The `Flyable` and `Quackable` interfaces sounded good at first. There is a design principle: > Identify the aspects of your application that vary and separate them from what stays the same. Another way to think about this principle: *take the parts that vary and encapsulate them, so that later you can alter or extend the parts that vary without affecting those that don't*. We know that `fly` and `quack` are the parts of the Duck class that vary across ducks. We pull these methods out of the Duck class and create a new set of classes to represent each behaviour (FlyBehaviour, QuackBehaviour, ...). That way, the Duck classes won't need to know any of the implementation details for their own behaviours. > Program to an interface, not an implementation. == Program to a supertype. Programming to an implementation: ```java Dog d = new Dog(); // a concrete implementation of Animal d.bark() ``` Programming to an interface/supertype: ```java Animal animal = new Dog(); // we knwo it is a Dog, but we can now use the animal reference polymorphically animal.makeSound(); ``` *Using our new design, what would you do if you needed to add rocket-powered flying to the SimUDuck app?* - My answer: Add a new implementation of the `FlyBehaviour` *Can you think of a class that might want to use the Quack behaviour that isn't a duck?* - My answer: Russian quacking machine A Duck will now delegate its flying and quacking behaviours, instead of using quacking and flying methods defined in the Duck class. To change a duck's behaviour at runtime, just call the duck's setter method for that behaviour. Design principle: > Favour composition over inheritance. Creating systems using composition gives you a lot more flexibility. Not only does it let you encapsulate a family of algorithms into their own set of classes, but it also lets you change behaviour at runtime. Composition Is used in many design patterns, and you will see a lot more about its advantages and disadvantages throughout the book. *A duck call is a device that hunters use to mimic the calls (quacks) of ducks. How would you implement your own duck call that does not inherit from the Duck class?* - My answer: Compose a duck call of `QuackBehaviour`. I have just applied the **STRATEGY** pattern. **The Strategy Pattern** - defines a family of algorithms, encapsulates each one, and makes them interchangeable. Strategy lets the algorithm vary independently of clients that use it. Design puzzle: - *KnifeBehaviour, BowAndArrowBehaviour, AxeBehaviour, SwordBehaviour* IMPLEMENT *WeaponBehaviour* - *Troll, Queen, King, Knight* EXTENDS *Character* - *Character* HAS-A *WeaponBehaviour* - `setWeapon` should be in *Character* class Design Patterns give you a shared vocabulary with other developers. Once you have got the vocabulary, you can more easily communicate with other developers and inspire those who don't know patterns to start learning them. It also elevates your thinking about architectures by letting you think at the pattern level, not the nitty-gritty object level. The power of a shared pattern vocabulary: - Shared pattern vocabularies are POWERFUL. When you communicate with another developer using patterns, you are communicating not just a pattern name but a whole set of qualities, characteristics and constraints that the pattern represents. - Patterns allow you to say more with less. Other developers can quickly know precisely the design you have in mind. - Talking at the pattern level allows you to stay *in the design* loner, without having to dive deep down to the nitty-gritty details of implementing objects and classes. - Shared vocabularies can turbo-charge your team. A team well versed in design patterns can move quickly with less room for misunderstanding. - Shared vocabularies encourage more junior developers to get up to speed. Design patterns don't go directly into your code, they first go into your **brain**. Once you have loaded your brain with a good working knowledge of patterns, you can then start to apply them to new designs, and rework your old code when you find it is degrading into inflexible mess. OO Basics: Abstraction, Encapsulation, Polymorphism, Inheritance OO Principles: Encapsulate what varies. Favour composition over inheritance. Program to interfaces, not implementations. Bullet points: - Knowing the OO basics does not make you a good OO designer. - Good OO designs are reusable, extensible and maintainable. - Patterns show you how to build systems with good OO design qualities. - Patterns are proven OO experience. - Patterns don't give you code, they give you general solutions to design problems. You apply them to your specific application. - Patterns aren't invented, they are discovered. - Most patterns and principles address issues of change in software. - Most patterns allow some part of a system to vary independently of all other parts. - We often try to take what varies in a system and encapsulate it. - Patterns provide language that can maximise the value of your communication with other developers. ## Chapter 2: Keeping your Objects in the Know [The Observer Pattern - Pattern implementation in Python](https://github.com/pkardas/learning/blob/master/books/head-first-design-patterns/ch_02_observer.py) Observer Pattern: Pattern that keeps your objects in the know when something they care about happens. Weather-O-Rama, our task is to create an app that uses the WeatherData object to update 3 displays for current conditions weather stats and a forecast. *Based on our first implementation, which of the following apply?* - My answers: [A] We are coding to concrete implementations, not interfaces. [B] For every new display we will need to alter this code. [C] We have no way to add or remove display elements at runtime. [E] We haven't encapsulated the part that changes. You know how newspaper or magazine subscriptions work: 1. A newspaper publisher goes into business and begins publishing newspapers 2. You subscribe to a particular publisher, and every time there is a new edition it gets delivered to you. As long as you remain a subscriber, you get new newspapers. 3. You unsubscribe when you don't want papers anymore, and they stop being delivered. 4. While the publisher remains in business, people, hotels, airlines and other businesses constantly subscribe and unsubscribe to the newspaper. > Publishers + Subscribers = Observer Pattern The Observer Pattern: > Defines a one-to-many dependency between objects so that when one object changes state, all of its dependencies are > notified and updated automatically. There are few different ways to implement the Observer Pattern, but most revolve around a class design that includes Subject and Observer interfaces. Because the subject is the sole owner of the data, the observers are dependent on the subject to update them when the data changes. This leads to a cleaner OO design than allowing many objects to control the same data. **We say an object is tightly coupled to another object when it is too dependent on that object.** Loosely coupled object doesn't know or care too much about the details of another object. By not knowing too much about other objects, we can create designs that can handle change better. The Observer Pattern is a great example of loose coupling. The ways the pattern achieves loose coupling: 1. The only thing the subject knows about an observer is that it implements a certain interface. 2. We can add new observers at any time. 3. We never need to modify the subject to add new types of observers. 4. We can reuse subjects or observers independently of each other. 5. Changes to either the subject or an observer will not affect the other. Design principle: > Strive for loosely coupled designs between objects that interact. Loosely coupled designs allow us to build flexible systems that can handle change because they minimise the interdependency between objects. The Observer Pattern is one of the most common patterns in use, and you will find plenty of examples of the pattern being used in many libraries and frameworks (Swing, JavaBeans, Cocoa, ...). Listener == Observer Pattern. The Observer Pattern can be used for sending "notifications" so that observers can pull the data on their own. Bullet points: - The Observer Pattern defines a one-to-many relationship between objects. - Subjects update Observers using a common interface. - Observers of any concrete type can participate in the pattern as long they implement the Observer interface. - Observers are loosely coupled in that the Subject knows nothing about them, other than that they implement the Observer interface. - You can push or pull data from the Subject when using the pattern (pull is considered more correct). - Swing makes heavy use of the Observer Pattern, as do many GUI frameworks. - You will also find the pattern in many other places including RxJava, JavaBeans and RMI, as well as in other language frameworks, like Cocoa, Swift and JavaScript events. - The Observer Pattern is related to the Publish / Subscribe Pattern, which is for more complex situations with multiple Subjects and or / multiple message types. - The Observer Pattern is a commonly used pattern, and we will see it again when we learn about Model-View-Controller. *For each design principle, describe how the Observer Pattern makes use of the principle:* - Identify the aspects of your application that vary and separate them from what stays the same: Observers and data vary. - Program to an interface, not an implementation: Subject and Observers are loosely coupled because what they know about each other are the interfaces they implement. - Favour composition over inheritance: Subject holds a list of observers, observers hold a reference to the subject. ## Chapter 3: Decorating Objects [The Decorator Pattern - Pattern implementation in Python](https://github.com/pkardas/learning/blob/master/books/head-first-design-patterns/ch_03_decorator.py) We will re-examine the typical overuse of inheritance, and we will learn how to decorate classes at runtime using a form of object composition. Starbuzz system has created a maintenance nightmare for themselves. They are violating "*Identify the aspects of your application that vary and separate them from what stays the same" and "*Favour composition over inheritance*". Problems with the suggested design: - My answer: What if customer has promo coupon e.g -20%. What if condiment is not available. If I can extend an objects' behaviour through composition, then I can do this dynamically at runtime. When I inherit by subclassing that behaviour is set statically at compile time. By dynamically composing objects, I can add new functionality by writing new code, rather than altering existing code. Because I am not changing existing code, the changes of introducing bugs or causing unintended side effects in pre-existing code are much reduced. Code should be closed to change, yet open to extension. Design principle - one of the most important design principles: > Classes should be open for extension, but closed for modification. OPEN - if it needs or requirements change, just go and make your own extensions. CLOSED - we spent a lot of time getting this code correct and bug free, so we can't let you alter the existing code. It must remain closed to modification. Our goal is to allow classes to be easily extended to incorporate new behaviour without modifying existing code. Designs that are resilient to change and flexible enough to take on new functionality to meet changing requirements. E.g. The Observer Pattern - we can add new Observers and extend the Subject at any time. Many of the patterns give us time-tested designs that protect your code from being modified by supplying a means of extension. How can I make every part of my design follow the Open-Closed Principle? Usually you can't. Making OO design flexible and open to extension without modifying existing code takes time and effort. Applying the Open-Closed principle EVERYWHERE is wasteful and unnecessary, and can lead to complex, hard-to-understand code. The Decorator Pattern: > Attaches additional responsibilities to an object dynamically. Decorators provide a flexible alternative to > subclassing for extending functionality. The decorator adds its own behaviour before and / or after delegating to the object it decorates to do the rest of the job. Just because we are subclassing, it doesn't mean we use inheritance. Sometimes we are subclassing in order to have the correct type, not to inherit the behaviour. We can acquire new behaviour not by inheriting it from a superclass, but by composing objects together. Decorators are typically created using other patterns like Factory and Builder. `java.io` is largely based on Decorator. Java I/O also points out one of the downsides of the Decorator Pattern: designs using this pattern often result in a large number of small classes, that can be overwhelming to the developer trying to use the Decorator-based API. Bullet points: - Inheritance is one form of extension, but not necessarily the best way to achieve flexibility in our designs. - In our designs we should allow behaviour to be extended without the need to modify existing code. - Composition and delegation is often used to add new behaviours at runtime. - The Decorator Pattern an alternative to subclassing for extending behaviour. - The Decorator Pattern involves a set of decorator patterns that are used to wrap concrete components. - Decorator classes mirror the type of the components they decorate (in fact they are the same type as the components they decorate, either through inheritance or interface implementation). - Decorators change the behaviour of their components by adding new functionality before and / or after method calls to the component. - You can wrap a component with any number of decorators. - Decorators are typically transparent to the client of the component - that is, unless the client is relying on the component's concrete type. - Decorators can result in many small objects in our design, and overuse can be complex. ## Chapter 4: Baking with OO Goodness [The Factory Pattern - Pattern implementation in Python](https://github.com/pkardas/learning/blob/master/books/head-first-design-patterns/ch_04_factory.py) There is more to making objects than just using the *new* operator. We will learn that instantiation is an activity that shouldn't always be done in public and can often lead to coupling problems. The Factory Pattern can save us from embarrassing dependencies. We should not program to an implementation, but every time we use *new* that is exactly what we do. The *new* operator instantiating a concrete class, so that is definitely an implementation not an interface. CHANGE impacts our use of *new*. Code will have to be changed new concrete classes are added. *How might you take all the parts of your application that instantiate concrete classes and separate or encapsulate them from the rest of your application?* - My answer: I would add a function returning instantiated classes. Indeed, we can encapsulate object creation, we can take the creation code and move it into another object that is only going to be concerned with creating pizzas. Anytime it needs pizza, it asks the pizza factory to make one. By encapsulating object creation in one class, we have only one place to make modifications when the implementation changes. Simple object factory can be a static function, however it has the disadvantage that we can not subclass and change behaviour of the create method. The Simple Factory isn't actually a Design Pattern, it is more of a programming idiom. Some developers do mistake this idiom for the Factory Pattern. A *factory method* handles object creation and encapsulates it in a subclass. This decouples the client code ( e.g. `orderPizza`) in the superclass from the object creation code in the subclass. ```java public abstract class PizzaStore { public Pizza orderPizza(String type) { Pizza pizza; pizza = createPizza(type); pizza.prepare(); pizza.bake(); pizza.cut(); pizza.box(); return pizza; } protected abstract Pizza createPizza(String type); } ``` All factory patterns encapsulate object creation. The Factory Method Pattern encapsulates object creation by letting subclasses decide what objects to create. For every concrete Creator, there is typically a whole set of products that it creates. Chicago pizza creators create different types of Chicago-style pizza, New York pizza creators create different types of New York-style pizza, and so on. The Factory Method Pattern: > Defines an interface for creating an object, but lets subclasses decide which class to instantiate. Factory Method > lets a class defer instantiation to subclasses. Creator is written to operate on products produced by the factory method. The Creator class is written without knowledge of the ac dual products that will be created. Only subclasses actually implement the factory method and create products. When you directly instantiate an object, you are depending on its concrete class. Reducing dependencies to concrete classes in our code is a "good thing". General Principle - Dependency Inversion Principle: > Depend upon abstractions. Do not depend upon concrete class. It suggests that our high-level components should not depend on out low-level components, rather, they should both depend on abstractions. The "inversion" in the name Dependency Inversion Principle is there because it inverts they way you typically might think about your OO design. Low-level components now depend on higher-level abstraction. Guidelines that can help to avoid OO designs that violate the Dependency Inversion Principle: - No variable should hold a reference to a concrete class (if you use new, you will be holding a reference, use factory instead) - No class should derive from a concrete class (If you derive, you depend, derive from an abstraction) - No method should override an implemented method of its base classes (if you override an implemented method, your base wasn't really an abstraction to start with) This is a guideline you should strive for, rather than a rule you should follow all the time. Clearly, every single Java program ever written violates these guidelines. But if you internalise these guidelines and have them in the back of your mind when you design, you will know when you are violating the principle, and you will have a good reason for doing so. An Abstract Factory gives un an interface for creating a family of products. By writing code that uses this interface, we decouple our code from actual factory that creates the products. That allows us to implement a variety of factories that produce products meant for different contexts - such as different regions, operating systems of different look and feels. Because code is decouples from the actual products, we can substitute different factories to get different behaviours. The Abstract Factory Pattern: > Provides an interface for creating families of related or dependent objects without specifying their concrete classes. Often the methods of an Abstract Factory are implemented as factory methods. The Factory Method and The Abstract Factory are both good at decoupling applications from specific implementations. - Use Abstract Factory whenever you have families of products you need to create, and you need to make sure your clients create products that belong together. Abstract Factory creates objects through object composition. - Use Factory Methods to decouple client code from the concrete classes you need to instantiate, or if you don't know ahead of time all the concrete classes you are going to need. Factory Method creates objects through inheritance. Bullet points: - All factories encapsulate object creation. - Simple Factory, while not a bona fide design pattern, is a simple way to decouple your clients from concrete classes. - Factory Method relies on inheritance: object creation is delegated to subclasses, which implement the factory method to create objects. - Abstract Factory relies on object composition: object creation is implemented in methods exposed in the factory interface. - All factory patterns promote loose coupling by reducing the dependency of your application on concrete classes. - The intent of Factory Method is to allow a class to defer instantiation to its subclasses. - The intent of Abstract Factory is to create families of related objects without having to depend on their concrete classes. - The Dependency Inversion Principle guides us to avoid dependencies on concrete types and to strive for abstractions. - Factories are powerful technique for coding to abstractions, not concrete classes. ## Chapter 5: One-of-a-kind Objects [The Singleton Pattern - Pattern implementation in Python](https://github.com/pkardas/learning/blob/master/books/head-first-design-patterns/ch_05_singleton.py) The ticket to creating one-of-a-kind objects for which there is only one instance, ever. By using singleton you can ensure that every object in your application is making use of the same global resource. Often used to manage pools of resources, like connection or thread pools. _How might things go wrong if more than one instance of ChocolateBoiler is created in an application?_ - My answer: Incorrect state management, because of multiple instances. The Singleton Pattern: > Ensures a class has only one instance, and provides a global point of access to it. Despite using the Singleton Pattern, multithreaded-application can still cause problems - instantiate multiple objects. In Javan solution for this is to use `synchronized` keyword. ```java public static synchronized Singleton getInstance() { ... } ``` `synchronized` - forces every thread to wait fot its turn before it can enter the method. That is, no 2 threads may enter the method at the same time. Synchronization may be expensive, but here it will be used only once on `uniqueInstance` initialization. After the first time, synchronization is totally unneeded overhead. There are Java-specific solutions to this overhead (e.g. double-check locking). The Singleton Pattern violates "_the loose coupling principle_", if you make a change to the Singleton, you will likely have to make a change to every object connected to it. A global variable can provide a global access, but not ensure only one instance. Global variables also tend to encourage developers to pollute the namespace with lots of global references to small objects. Singletons don't encourage this in the same way, but can be abused nonetheless. It is possible to implement Singleton as an enum. Bullet points: - The Singleton Pattern ensures you have at most one instance of a class in your application. - The Singleton Pattern also provides a global access point to that interface. - Java's implementation of the Singleton Pattern makes use of a private constructor, a static method combined with a static variable. - Examine your performance and resource constraints and carefully choose an appropriate Singleton for multithreaded applications (we should consider all applications multithreaded). ## Chapter 6: Encapsulating Invocation [The Command Pattern - Pattern implementation in Python](https://github.com/pkardas/learning/blob/master/books/head-first-design-patterns/ch_06_command.py) In this chapter we are going to encapsulate method invocation. By encapsulating method invocation, we can crystallize pieces of computation so that the object invoking the computation doesn't need to worry about how to do things, it just uses our crystallized method to get it done. The Command Pattern allows you to decouple the requester of an action from the object that actually performs the action. This can be achieved by introducing command objects into the design. A command object encapsulates a request to do something on a specific object. Example with a waitress taking orders and passing them to a cook - separation of an object making a request from the object that receive and execute requests. - Customer - Client - Order - Command - Waitress - Invoker - Short-Order Cook - Receiver - takeOrder - setCommand - sets what is supposed to be executed - orderUp - execute The Command Pattern: > Encapsulates a request as an object, thereby letting you parametrize other objects with different requests, > queue or log requests, and support undoable operations. A null object is useful when you don't have a meaningful object to return, and yet you want to remove the responsibility of handling null from the client, e.g. `NoCommand` - surrogate and does nothing when its execute method is called. Command Pattern can be taken into the next level by using e.g. Java's lambda expressions. Instead of instantiating the concrete command objects, you can use function objects in their place. This can be done if Command interface has one abstract method. In order to support undoable Commands, `Command` interface has to be extended with `undo` method. `MacroCommand` can be used to execute multiple commands: ```java MacroCommand partyOnMacro = new MacroCommand({lightOn, stereoOn, tvOn, hottubOn}); ``` More uses of the Command Pattern: - queueing requests - objects implementing the command interface are added to the queue, threads remove commands from the queue on by one and call their `execute` method. Once complete, they go back for a new command object. This gives us an effective way to limit computation to a fixed number of threads. - logging requests - semantics of some applications require that we log all actions and be able to recover after a crash by re-invoking those actions. The Command Pattern can support these semantics with the addition of two methods: `store` and `load`. Bullet points: - The Command Pattern decouples an object making a request from the one that knows how to perform it. - A Command object is at the center of this decoupling and encapsulates a receiver with an action (or set of actions). - An invoker makes a request of a Command object by calling its execute method, which invokes these actions on the receiver. - Invokers can be parametrized with Commands, even dynamically at runtime. - Commands may support undo by implementing an undo method that restores the object to its previous state before the execute method was last called. - MacroCommands are a simple extension of the Command Pattern that allow multiple commands to be invoked. Likewise, MacroCommands can easily support undo. - In practice, it is not uncommon for "smart" Command objects to implement the request themselves rather than delegating to a receiver. - Commands may also be used to implement logging and transactional systems. ## Chapter 7: Being Adaptive [The Adapter Pattern - Pattern implementation in Python](https://github.com/pkardas/learning/blob/master/books/head-first-design-patterns/ch_07_adapter.py) [The Facade Pattern - Pattern implementation in Python](https://github.com/pkardas/learning/blob/master/books/head-first-design-patterns/ch_07_facade.py) We are going to wrap some objects with a different purpose: to make their interfaces look something they are not. So we can adapt a design expecting one interface to a class that implements a different interface. Also, we are going to look at another pattern that wraps objects to simplify their interface. You will have no trouble understanding what an OO adapter is because the real world is full of them (e.g. power adapter, The British wall outlet exposes on interface for getting power, the adapter converts one interface into another, the US laptop expects another interface). OO adapters play the same role as their real-world counterparts: they take an interface and adapt it to one that a client ise expecting. For example: you are going to use a new library, but the new vendor designed their interfaces differently than the last vendor. The adapter acts as the middleman by receiving requests from the client and converting them into requests that make sense on the vendor classes. _If it walks like a duck and quacks like a duck, then it ~~must~~ might be a ~~duck~~ turkey wrapped with a duck adapter..._ ```java public class TurkeyAdapter implements Duck { // take Turkey in the constuctor, implement Duck's method by invoking Turkey's methods. } ``` How the Client uses the Adapter: 1. The client makes a request to the adapter by calling a method on it using the target interface. 2. The adapter translates the request into one or more calls on the adaptee using the adaptee interface. 3. The client receives the results of the call and never knows there is an adapter doing the translation. It is possible to create a Two Way Adapter, just implement both interfaces involved, so the adapter can act as an old interface or a new interface. The Adapter Pattern: > Converts the interface of a class into another interface the client expects. Adapter lets classes work together that > couldn't otherwise because of incompatible interfaces. Adapter is used to decouple the client from the implemented interface, and if we expect the interface to change over time, the adapter encapsulates that change that the client doesn't have to be modified each time it needs to operate against a different interface. The Adapter Pattern is full of good OO design principles: uses object composition + binds the client to an interface, not an implementation. There is second type of adapter - class adapter, this one uses multiple inheritance (Target and Adaptee). Real-world adapters: - [Java] Enumerators - The Enumerator interface allows you to step through the elements of a collection without knowing the specifics of how they are managed in the collection. - [Java] Iterators - The more recent Collection classes use an Iterator interface, allows you to iterate through a set of items in a collection, and adds the ability to remove items. When a method in an adapter can not be supported you can throw e.g. `UnsupportedOperationException`. _Some AC adapters do more than just change the interface - they add other features like surge protection, indicator lights, and other bells and whistles. If you were going to implement these kinds of features, what pattern would you use?_ - My answer: The Decorator Pattern Decorator vs Adapter: - Decorators allow new behavior to be added to classes without altering existing code. - Adapter always convert the interface of what they wrap. Decorators and Adapters seem to look somewhat similar on paper, but clearly are miles apart. The Facade Pattern alters an interface, but in order to simplify the interface - it hides all the complexity of one or more classes behind a clean well-lit facade. The Facade Pattern can take a complex subsystem and make it easier to use. Example home cinema system: instead of turning on popcorn machine, screen and audio system - all you need to do is call `watchMovie`. Facades don't encapsulate the subsystem classes, they merely provide a simplified interface to their functionality. The subsystem classes still remain available. It provides a simplified interface while still exposing the full functionality of the system to those who may need it. A facade not only simplifies an interface, it decouples a client from a subsystem of components. Facades and adapters may wrap multiple classes, but a facade's intent is to simplify, while an adapter's is to convert the interface to something different. The Facade Pattern: > Provides a unified interface to a set of interfaces in a subsystem. Facade defines a higher-level interface that makes > the subsystem easier to use. Design principle - Principle of Least Knowledge (The Law of Demeter): > Talk only to your immediate friends. This principle guides us to reduce the interactions between objects to just a few close "friends". It means when you are designing a system, for any object, be careful of the number of classes it interacts with and also how it comes to interact with those classes. This principle prevents us from creating designs that have a large number of classes coupled together so that changes in one part of the system cascade to other parts. This means, invoke only methods that belong to: - the object itself - objects passed in as a parameter to the method - any object the method creates or instantiates - any components of the object _Side note: Principle of Least Knowledge is a better name than The Law of Demeter, because no principle is a law, and they don't have to be always applied._ The Facade Pattern and the Principle of Least Knowledge - we try to keep subsystems adhering to the Principle of Least Knowledge as well. If this gets too complex and too many friends are intermingling, we can introduce additional facades to form layers of subsystems. Bullet points: - When you need to use an existing class and its interface is not the one you need, use an adapter. - When you need to simplify and unify a large interface or complex set of interfaces, use a facade. - An adapter changes an interface into one a client expects. - A facade decouples a client from a complex subsystem. - Implementing an adapter may require little work or a great deal of work depending on the size and complexity of the target interface. - Implementing a facade requires that we compose the facade with its subsystem and use delegation to perform the work of the facade. - There are two forms of the Adapter pattern: object and class adapters. Class adapters require multiple inheritance. - You can implement more than one facade for a subsystem. - An adapter wraps an object to add new behaviours and responsibilities, and a facade "wraps" a set of objects to simplify. ## Chapter 8: Encapsulating Algorithms [The Template Method Pattern - Pattern implementation in Python](https://github.com/pkardas/learning/blob/master/books/head-first-design-patterns/ch_08_template_method.py) We are going to get down to encapsulating pieces of algorithms so that subclasses can hook themselves right into a computation any time they want. We can generalize the recipe and place it in a base class. ```java public abstract class CaffeineBeverage { final void prepareRecipe() { // Our template method - it serves as a template for an algorithm. boilWater(); brew(); pourInCup(); addCondiments(); } abstract void brew(); abstract void addCondiments(); void boilWater() {} void pourInCup() {} } ``` The Template Method defines the steps of an algorithm and allows subclasses to provide the implementation for one or more steps. The Template Method Pattern: > Defines the skeleton of an algorithm in a method, deferring some steps to subclasses. Template Method lets subclasses > redefine certain steps of an algorithm without changing the algorithm's structure. This pattern is all about creating a template for an algorithm. Template - just a method, method that defines and algorithms as a set of steps. One or more of these steps is defined to be abstract and implemented by a subclass. This ensures the algorithm's structure stays unchanged. We can also have concrete methods that do nothing by default - we call them `hooks`. Subclasses are free to override these but don't have to. Use abstract classes when subclass MUST provide an implementation of the method. Use hooks when that part of the algorithm is optional. The Hollywood Principle: > Don't call us, we'll call you. The Hollywood Principle gives us a way to prevent _dependency rot_. We allow low-level components to hook themselves into a system, but the high-level components determine when they are needed, and how. In other words, the high-level components give the low-level components the "don't call us, we'll call you" treatment. Patters using The Hollywood Principle: - The Template Method Principle - The Observer Pattern - The Strategy Pattern - The Factory Pattern The Dependency Inversion Principle teaches us to avoid the use of concrete classes and instead work as much as possible with abstractions. The Hollywood Principle is a technique for building frameworks or components so that lower-level components can be hooked into the computation, but without creating dependencies between lower and higher level components. This pattern is a great design tool for creating frameworks, where the framework controls how something gets done, but leaves you to specify your own details about what is actually happening at each step of the framework's algorithm. `sort` methods are in the spirit of The Template Method Pattern, developer has to define `compare` method.~~ Template Method vs Strategy: - Strategy defines a family of algorithms and make them interchangeable. - Factory Method defines the outline of an algorithm, and lets subclasses do some work. - Strategy uses object composition. - Template Method uses inheritance. Bullet points: - A template method defines the steps of an algorithm, deferring to subclasses for the implementation of those steps. - The Template Method Pattern gives us an important technique for code reuse. - The template method's abstract may define concrete methods, abstract methods and hooks. - Abstract methods are implemented by subclasses. - Hooks are methods that do nothing or default behavior in the abstract class, but may be overridden in the subclass. - To prevent subclasses form changing the algorithm in the template method, declare the template method as final. - The Hollywood Principle guides us to put decision making in high-level modules that can decide how and when to call low-level modules. - You will see lots of uses of the Template Method Pattern in real-world code, but (as with any pattern) don't expect it all to be designed "by the book". - The Strategy and Template Method Patterns both encapsulate algorithms, the first by composition and the other by inheritance. - Factory Method is a specialisation of Template Method. ## Chapter 9: Well-Managed Collections [The Iterator Pattern - Pattern implementation in Python](https://github.com/pkardas/learning/blob/master/books/head-first-design-patterns/ch_09_iterator.py) [The Composite Pattern - Pattern implementation in Python](https://github.com/pkardas/learning/blob/master/books/head-first-design-patterns/ch_09_composite.py) In this chapter we are going to see how we can allow our clients to iterate through objects without ever getting a peak at how we store the objects. Iterator - encapsulates the way we iterate through a collection of objects. The Iterator Pattern relies on an interface called Iterator. However, in Java following interface does not have to be defined, because Java has built-in Iterator interface. ```java public interface Iterator { boolean hasNext(); MenuItem next(); } ``` Once we have this interface, we can implement Iterators for any kind of collection of objects: arrays, lists, hash maps... The Iterator Pattern: > Provides a way to access the elements of an aggregate object sequentially without exposing its underlying > representation. The effect of using iterators in the design: once you have a uniform way of accessing the elements of all your aggregate objects, you can write polymorphic code that works with any of these aggregates. The other important impact on the design is that the Iterator Pattern takes the responsibility of traversing elements and gives that responsibility of traversing elements to the iterator object, not aggregate object. This not only keeps the aggregate interface and implementation simpler, it removes the responsibility for iterator from the aggregate and keeps the aggregate focused on the things it should be focused on (managing a collection of objects), not on iteration. The Single Responsibility Principle: > A class should have only one reason to change. We want to avoid change in our classes because modifying code provides all sorts of opportunities for problems to creep in. Having two ways to change increases the probability the class will change in the future, and when it does, it's going to affect two aspects of your design. **Cohesion** - is a measure of how closely a class of module supports a single purpose or responsibility. - High cohesion - designed around a set of related functions (easy to maintain, single responsibility) - Low cohesion - designed around a set of unrelated functions (difficult to maintain, multiple responsibilities) There comes a time when we must refactor our code in order for it to grow. To not of so would leave us with rigid, inflexible code that has no hope of ever sprouting new life. The Composite Pattern: > Allows you to compose objects into tree structures to represent part-whole hierarchies. Composite lets clients treat > individual objects and compositions of objects uniformly. Part-whole hierarchy - tree of objects that is made of parts (e.g. menus and menu items). Using a composite structure, we can apply the same operations over both composites and individual objects. In other words, in most cases we can ignore differences between compositions of objects and individual objects. A composite contains components. Components come in two flavors: composites and leaf elements. A composite holds a set of children: those children may be other composites or leaf elements. The Composite Pattern takes the Single Responsibility Principle and trades it dor transparency - by allowing the Component interface to contain the child management operations and the leaf operations, a client can treat both composites and leaves uniformly. We are guided by design principles, but we always need to observe the effect they have on our designs. Bullet points: - An Iterator allows access to an aggregate's elements without exposing its internal structure. - An Iterator takes the job of iterating over an aggregate and encapsulates it in another object. - When using an Iterator, we relieve the aggregate of the responsibility of supporting operations for traversing its data. - An Iterator provides a common interface for traversing the items of an aggregate, allowing you to use polymorphism when writing code that makes the use of the items of the aggregate. - The Iterable interface provides a means of getting an iterator and enables Java's enhanced for loop (for-each). - We should strive for to assign only one responsibility to each class. - The Composite Pattern allows clients to treat composites and individual objects uniformly. - A Component is any object in a Composite structure. Components may be other composites or leaves. - There are many design tradeoffs in implementing Composite. You need to balance transparency and safety with your needs. ## Chapter 10: The State of Things [The State Pattern - Pattern implementation in Python](https://github.com/pkardas/learning/blob/master/books/head-first-design-patterns/ch_10_state.py) The Strategy and State Patterns are twins separated at birth. The Strategy Pattern went on to create a wildly successful business around interchangeable algorithms, while State took the perhaps more noble path by helping objects to control their behavior by changing their internal state. As different as their paths became, however, underneath you will almost precisely the same design. The State Pattern: > Allows an object to alter its behavior when its internal state changes. The object will appear to change its class. The pattern encapsulates state into separate classes and delegates to the object representing the current state. What does it mean for an object to "appear to change its class"? If an object you are using can completely change its behavior, then it appears to you that the object is actually instantiated from another class. In reality, however, you know that we are using composition to give the appearance of a class change by simply referencing different state objects. Think of the Strategy Pattern as a flexible alternative to subclassing - if you use inheritance to define the behavior of a class, then you are stuck with that behavior even if you need to change it. With Strategy, you can change the behavior by composing with a different object. Think of the State Pattern as an alternative to putting lots of conditionals in your context - by encapsulating the behaviors within state objects, you can simply change the state object in context to change its behavior. Bullet points: - The State Pattern allows an object to have many behaviors that are based on its internal state. - Unlike a procedural state machine, the State Pattern represents each state as a full-blown class. - The Context gets its behavior by delegating to the current state object it is composed with. - By encapsulating each state into a class, we localize any changes that will need to be made. - The State and Strategy Patterns have the same class diagram, but they differ in intent. - The Strategy Pattern typically configures Context classes with a behavior or algorithm. - The State Pattern allows a Context to change its behavior as the state of the Context changes. - State transitions can be controlled by the State classes or by the Context classes. - Using the State Pattern will typically result in a greater number of classes in your design. - State classes may be shared among Context instances. ## Chapter 11: Controlling Object Access [The Virtual Proxy Pattern - Pattern implementation in Python](https://github.com/pkardas/learning/blob/master/books/head-first-design-patterns/ch_11_virtual_proxy.py) Proxies control and manage access. Proxies have been known to haul entire method calls over the internet for their proxied objects - they have been also known to patiently stand in for some pretty lazy objects. Proxy pretends it is the real object, but it is really communicating over the net to the real object. A remote proxy acts as a local representative to a remote object. Remote object is an object that lives in the heap of different JVM. Local representative - it is an object that you call local methods on and have them forwarded on to the remote object. RMI builds the client and the service helper objects. The nice thing about RMI is that you don't have to write any of the networking or I/O code yourself. Networking and I/O methods are risky and can fail. The client dopes have to acknowledge the risk. RMI nomenclature: client helper is a "stub" and the service helper is a "skeleton". The Proxy Pattern: > Provides a surrogate of placeholder for another object to control access to it. Use the Proxy Pattern to create a representative object that controls access to another object, which may be remote, expensive to create, or in need of securing. The Proxy Pattern can manifest itself in many forms, e.g. the Virtual Proxy. Virtual Proxy - acts as a representative for an object that bay be expensive to create. The Virtual Proxy often defers the creation of the object until it is needed. The Virtual Proxy also acts as a surrogate for the object before and while it is being created. After that, the proxy delegates requests to the RealSubject. ImageProxy for application displaying images: 1. ImageProxy first creates an ImageIcon and starts loading it from a network URL. 2. While the bytes of the image are being retrieved, ImageProxy displays "Loading album cover, please wait..." 3. When the image is fully loaded, ImageProxy delegates all method calls to the image icon 4. If the user requests a new image, we will create a new proxy and start the process over. There are a lot of variants of the Proxy Pattern in the real world. What they all have in common is that they intercept a method invocation that the client is making to the subject. This level of indirection allows us to do many things, including dispatching requests to a remote subject, providing a representative for an expensive object as it is created or providing some level of protection that can determine which clients should be calling which methods. Protection Proxy - a proxy that controls access to an object based on access rights. For example: `Employee` object - a Protection Proxy might allow the employee to call certain methods on the object, a manager to call additional methods ( like `setSalary`), and an HR employee to call any method on the object. Additional proxies: - Firewall Proxy - controls access to a set of network resources, protecting the subject from "bad" clients. - Smart Reference Proxy - provides additional actions whenever a subject is referenced, such as counting the number of references to an object. - Caching Proxy - provides temporary storage for results of operations that are expensive. It can also allow multiple clients to share the results to reduce computation or network latency. - Synchronization Proxy - provides safe access to a subject from multiple threads. - Complexity Hiding Proxy - hides the complexity of and controls access to a complex set of classes. This is sometimes called the Facade Proxy for obvious reasons. The Complexity Hiding Proxy differs from the Facade Pattern in that the proxy controls access, while the Facade Pattern just provides an alternative interface. - Copy-On-Write Proxy - controls the copying of an object by deferring the copying of an object until it is required by a client. This is a variant of the Virtual Proxy. Bullet points: - The Proxy Pattern provides a representative for another object in order to control the client's access to it. - A Remote Proxy manages interaction between a client and a remote object. - A Virtual Proxy controls access to the methods of an object based on the caller. - A Protection Proxy controls access to the methods of an object based on the caller. - Many other variants of the Proxy Pattern exist, including caching proxies, firewall proxies, copy-on-write proxies, and so on. - Proxy is structurally similar to Decorator, but the two patterns differ in their purpose. - The Decorator Pattern adds behavior to an object, while Proxy controls access. - Java's built-in support for Proxy can build a dynamic proxy class on demand and dispatch all calls on it to a handler of your choosing. - Like any wrapper, proxies will increase the number of classes and objects in your designs. ## Chapter 12: Patterns of Patterns Some of the most powerful OO designs use several patterns together. Compound patterns - set of patterns that wort together in a design that can be applied over many problems. Patterns are often used together and combined within the same design solution. A compound pattern combines two or more patterns into a solution that solves a recurring or general problem. It is possible to rework Duck Simulator from the first chapter using 6 patterns. In fact, you never actually want to approach a design like this. You only want to apply patterns when and where they make sense. **You never want to start out with the intention of using patterns just for the sake of it**. MVC - it is just a few patterns put together. Music players underneath use MVC. View - gives you a presentation of the model. The view usually gets the state and data it needs to display directly from the model. Controller - takes user input and figures out what it means to the model. Model - the model holds all the data, state and application logic. The model is oblivious to the view and controller, although it provides an interface to manipulate and retrieve its state, and it can send notifications of state changes to observers. You are the user - you interact with the view. When you do something to the view, then the view tells the controller what you did. It is controller's job to handle that. The controller asks the model to change its state. If you click a button it is the controller's job to figure out what that means and how the model should be manipulated based on that action. The controller may also ask the view to change. The model notifies the view when its state has changed. The view asks the model for state. MVW is made of: - Strategy - The view and controller implement the classic Strategy Pattern - the view is configured with a strategy, the controller provides strategy. - Composite - the display consists of a nested set of windows, panels, buttons, text labels and so on. Each display is a composite (like a window) or a leaf (like a button). When the controller tells the view to update, it only has to tell the top view component, and Composite takes care of the rest. - Observer - The model implements the Observer Pattern to keep interested objects updated when state changes occur. Typically, you need one controller per view at runtime; however the same controller class can easily manage many views. MVC has been adopted to many web frameworks: - thin client - the model and most of the view and the controller all reside in the server, with the browser providing a way to display the view, and to get input from the browser to the controller. - single page application - almost all the model, view and controller reside on the client side. MVC frameworks: Django, AngularJS, EmberJS, ... Bullet points: - The Model View Controller Pattern is a compound pattern consisting of the Observer, Strategy and Composite Patterns. - The model makes use of the Observer Pattern so that it can keep observers updated yet stayed decoupled from them. - The controller is the Strategy for the view. The view can use different implementations of the controller to get different behavior. - The view uses the Composite Pattern to implement the user interface, which usually consists of nested components like panels, frames and buttons. - These patterns work together to decouple the three players in the MVC model, which keep designs clear and flexible. - The Adapter Pattern can be used to adapt a new model to an existing view and controller. - MVC has been adapted to the web. - There are many web MVC frameworks with various adaptations of the MVC pattern to fit the client/server application structure. ## Chapter 13: Patterns in the Real World A Pattern: > is a solution to a problem in a context. - The context is the situation in which the pattern applies. This should be recurring situation. - The problem refers to the goal you are trying to achieve in this context, but it also refers to any constraints that occur in the context. - The solution is what you are after: a general design that anyone can apply that resolves the goal and set of constraints. Like design principles, patterns are not meant to be laws or rules - they are guidelines that you can alter to fit your needs. A lot of real-world examples don't fit the classic pattern designs. When you adapt patterns, it never hurts to document how your pattern differs from the classic design - that way other developers can quickly recognize the patterns you are using. The Design Pattern definition tells us that the problem consists of a goal and set of constraints. Only when solution balances both sides of the _force_ (goal - constraints) do we have a useful pattern. Design pattern should have: a name, a template, an intent, motivation, applicability, code example, use cases, how pattern relates to other patterns, consequences. Design patterns are discovered, not created. Anyone can discover a Design Pattern, however it is not easy and doesn't happen quickly. You don't have a pattern until others have used it and found it to work. You don't have a pattern until it passes the Rule of Thee - a pattern can be called a pattern only if it has been applied in a real-world solution at least 3 times. Creational Patterns - involve object instantiation and all provide a way to decouple a client from the objects it needs to instantiate: Singleton, Abstract Factory, Factory Method. Behavioral Patterns - concerned with how classes and objects interact and distribute responsibility: Template Method, Iterator, Command, State, Observer, Strategy. Structural Patterns - let you compose classes or objects into larger structures: Proxy, Facade, Composite, Adapter, Decorator. Patterns are often classified by a second attribute - whether the pattern deals with classes or objects: Class Patterns (Template Method, Factory Method, Adapter) and Object Patterns (Composite, Decorator, State, Singleton, ...). Categorisation is confusing because many patterns fit into more than one category. Categories give us a way to think about the wa groups of patterns relate and how patterns within a group relate to one another. They also give us a way to extrapolate to new patterns. Keep it simple - KISS - your goal should be simplicity, not "how can I apply a pattern to this problem". Don't feel like you aren't a sophisticated developer if you don't use a pattern to solve a problem. Patterns aren't a magic bullet. You can't plug one in, compile and then take and early lunch. To use patterns, you need to think through the consequences for the rest of your design. Refactoring is a great time to reexamine your design to see if it may be better structured with patterns. Don't be afraid to remove a Design Pattern from your design. Remove, when a simpler solution without the pattern would be better. _YAGNI_: Resist the temptation of creating architectures that are ready to take on change from any direction. If the reason for adding a pattern is only hypothetical, don't add the pattern: it will only add complexity to your system, and you might never need it. Overuse of design patterns can lead to code that is downright overengineered. Always go with the simplest solution that does the job and introduce patterns where the need emerges. The Beginner uses patterns everywhere. The Intermediate starts to see where patterns are needed and where they aren't. The Zen mind is able to see patterns where they fit naturally. Anti-pattern: > Tells you how to go from a problem to a BAD solution. An anti-pattern tells you why bad solution is attractive, tells why that solution in the long term is bad and suggests other applicable patterns that may provide good solutions. An anti-pattern always looks like a good solution, but then turns out to be a bad solution when it is applied. BY documenting anti-patterns we help others to recognize bad solutions before they implement them. Like many patterns, there are many types of anti-patterns including development, OO, organizational, and domain specific anti-patterns. Bullet points: - Let Design Patterns emerge in your designs, don't force them in just for the sake of using a pattern. - Design Patterns aren't set in stone - adapt and tweak them to meet your needs. - Always use the simplest solution that meets your needs, even if it doesn't include a pattern. - Study Design Patterns catalogs to familiarize yourself with patterns and the relationship among them. - Pattern classifications provide groupings for patterns. When they help, use them. - You need to be committed to be a patterns' writer - it takes time and patience, and you have to be willing to do lots of refinement. - Remember, most patterns you encounter will be adaptations of existing patterns, not new patterns. - Build your team's shared vocabulary. This is one of the most powerful benefits of using patterns. - Like any community, the patterns community has its own lingo. Don't let that hold you back. Having read this book, you know most of it. ## Chapter 14: Leftover Patterns **Bridge** > Use the Bridge Pattern to vary not only your implementations, but also your abstractions. Benefits: + Decouples an implementation so that it is bout bound permanently to an interface. + Abstraction and implementation can be extended independently. + Changes to the concrete abstraction classes don't affect the client. Bridge Uses and Drawbacks: - Useful in graphics and windowing systems that need to run over multiple platforms. - Useful any time you need to vary an interface and an implementation in different ways. - Increases complexity. **Builder** > Use the Builder Pattern to encapsulate the construction of a product and allow it to be constructed in steps. Benefits: + Encapsulates the way a complex object is constructed. + Allows objects to be constructed in a multistep and varying process (as opposed to one-step factories). + Hides the internal representation of the product from the client. + Product implementations can be swapped in and out because the client only sees an abstract interface. Builder Uses and Drawbacks: - Often used for building composite structures. - Constructing objects requires more domain knowledge of the client than when using a Factory. **Chain of Responsibility** > Use the Chain of Responsibility Pattern when you want to give more than one object a chance to handle a request. Benefits: + Decouples the sender of the request and its receivers. + Simplifies your object because it doesn't have to know the chain's structure and keep direct references to its members. + Allows you to add or remove responsibilities dynamically by changing the members or order of the chain. Chain of Responsibility Uses and Drawbacks: - Commonly used in Windows systems to handle events like mouse clicks and keyboard events. - Execution of the request isn't guaranteed - it may fall to the end of the chain if no object handles it. - Can be hard to observe and debug at runtime. **Flyweight** > Use the Flyweight Pattern when one instance of a class can be used to provide many virtual instances. Benefits: + Reduces the number of objects instances at runtime, saving memory. + Centralizes state for many "virtual" objects into a single location. Flyweight Uses and Drawbacks: - The Flyweight is used when a class has many instances, and they all can be controlled identically. - A drawback, of the Flyweight Pattern is once you have implemented it, single, logical instances of the class will not be able to behave independently from the other instances. **Interpreter** > Use the Interpreter Pattern to build an interpreter for a language. When you need to implement a simple language, the Interpreter Pattern defines a class based representations for its grammar along with an interpreter to interpret its sentences. To represent the language, you use a class to represent each rule in the language. Benefits: + Representing each grammar rule in a class makes the language easy to implement. + Because the grammar is represented by classes, you can easily change or extend the language. + By adding methods to the class structure, you can add new behaviors beyond interpretation, like pretty printing and more sophisticated program validation. Interpreter Uses and Drawbacks: - Use Interpreter when you need to implement a simple language. - Appropriate when you have a simple grammar and simplicity is more important than efficiency. - Used for scripting and programming languages. - This pattern can become cumbersome when the number of grammar rules is large. In these cases a parser/compiler generator may be more appropriate. **Mediator** > Use the Mediator Pattern to centralize complex communications and control between related objects. Benefits: + Increases the reusability of the objects supported by the Mediator by decoupling them from the system. + Simplifies maintenance of the system by centralizing control logic, + Simplifies and reduces the variety of messages sent between objects in the system. Mediator Uses and Drawbacks: - The Mediator is commonly used to coordinate related GUI components. - A drawback so the Mediator Pattern is that without proper design, the Mediator object itself can become overly complex. **Memento** > Use the Memento Pattern when you need to be able to return an object to one of ots previous states: for instance, if > your user requests an "undo" The Memento has 2 goals: Saving the important state of a system's key object. Maintaining the key object's encapsulation. Keeping the Single Responsibility Principle in mind, it is also a good idea to keep the state that you are saving separate from the key object. This separate object that holds the state is known as the Memento object. Benefits: + Keeping the saved state external from the key object helps to maintain cohesion. + Keeps the key object's data encapsulated. + Provides easy-to-implement recovery capability. Memento Uses and Drawbacks: - The Memento is used to save state. - A drawback to using Memento is that saving and restoring state can be time-consuming. - In Java systems, consider using Serialization to save a system's state. **Prototype** > Use the Prototype Pattern when creating an instance of a given class is either expensive or complicated. The Prototype Pattern allows you to make new instances by copying existing instances. Benefits: + Hides the complexities of making new instances from the client. + Provides the option for the client to generate objects whose type is not known. + In some circumstances, copying an object can be more efficient than creating a new object. Prototype Uses and Drawbacks: - Prototype should be considered wen a system must create new objects of many types in a complex class hierarchy. - A drawback to using Prototype is that making a copy of an object can sometimes be complicated. **Visitor** > Use the Visitor Pattern when you want to add capabilities to a composite of objects and encapsulation is not > important. The Visitor works hand in hand with a Traverser. The Traverser knows how to navigate to all the objects in a Composite. The Traverser guides the Visitor through the Composite so that the Visitor can collect state as it goes. Once state has been gathered, the Client can have the Visitor pattern perform various operations on the state. Benefits: + Allows you to add operations to a Composite. + Adding new operations is relatively easy. + The code for operations performed by the Visitor is centralized. Visitor Drawbacks: - The Composite classes' encapsulation is broken when the Visitor is used. - Because the traversal function is involved, changes to the Composite structure are more difficult. ================================================ FILE: books/kubernetes-book.md ================================================ [go back](https://github.com/pkardas/learning) # The Kubernetes Book Book by Nigel Poulton, https://github.com/nigelpoulton/TheK8sBook - [1: Kubernetes primer](#1-kubernetes-primer) - [2: Kubernetes principles of operation](#2-kubernetes-principles-of-operation) - [3: Getting Kubernetes](#3-getting-kubernetes) - [4: Working with Pods](#4-working-with-pods) - [5: Virtual clusters with Namespaces](#5-virtual-clusters-with-namespaces) - [6: Kubernetes Deployments](#6-kubernetes-deployments) - [7: Kubernetes Services](#7-kubernetes-services) - [8: Ingress](#8-ingress) - [9: Service discovery deep dive](#9-service-discovery-deep-dive) - [10: Kubernetes storage](#10-kubernetes-storage) - [11: ConfigMaps and Secrets](#11-configmaps-and-secrets) - [12: StatefulSets](#12-statefulsets) - [13: API security and RBAC](#13-api-security-and-rbac) - [14: The Kubernetes API](#14-the-kubernetes-api) - [15: Threat modeling Kubernetes](#15-threat-modeling-kubernetes) ## 1: Kubernetes primer Kubernetes - an application orchestrator, it orchestrates containerized cloud-native microservices apps. - orchestrator - a system that deploys and manages applications (dynamically respond to changes - scale up/down, self-heal, perform zero-downtime rolling updates) - containerized app - app that runs in a container - 1980-1990 physical servers era, 2000-2010 virtual machines and virtualization era, now cloud-native era - cloud-native app - designed to meet cloud-like demands of auto-scaling, self-healing, rolling updates, rollbacks and more, cloud-native is about the way applications behave and react to events - microservices app - built from lots of small, specialised, independent parts that work together to form a meaningful application Kubernetes enables 2 things Google and the rest of the industry needs: 1. It abstracts underlying infrastructure such as AWS 2. It makes it easy to move applications on and off clouds Kubernetes vs Docker Swarm - long story short, Kubernetes won. Docker Swarm is still under active development and is popular with small companies that need simple alternative to Kubernetes. Kubernetes as the operating system of the cloud: - you install a traditional OS on a server, and it abstracts server resources and schedules application processes - you install Kubernetes on a cloud, and it abstracts cloud resources and schedules application microservices At a high level, a cloud/datacenter is a pool of compute, network and storage resources. Kubernetes abstracts them. Servers are no longer pets, they are cattle. Kubernetes is like a courier service - you package the app as a container, give it a Kubernetes manifest, and let Kubernetes take care of deploying it and keeping it running. ## 2: Kubernetes principles of operation Kubernetes is 2 things: - a cluster to run applications on - like any cluster - bunch od machines to host apps - these machines are called "nodes" (physical servers, VMs, cloud instances, Raspberry PIs, ...) - cluster is made of: - control plane (the brains) - exposes the API, has a scheduler for assigning work, records the state of the cluster and apps - worker nodes (the muscle) - where user apps run - an orchestrator of cloud-native microservices apps - a system that takes care of deploying and managing apps Simple process to run apps on a Kubernetes cluster: 1. Design and write the application as small independent microservices 2. Package each microservice as its own container 3. Wrap each container in a Kubernetes Pod 4. Deploy Pods to the cluster via higher-level controllers such as Deployments, DaemonSets, StatefulSets, CronJobs, ... The Control Plane - runs a collection of system services that make up the control plane of the cluster (Master, Heads, Head nodes). Production envs should have multiple control plane nodes - 3 or 5 recommended, and should be spread across availability zones. Different services making up the control plane: - The API server - the Grand Central station of Kubernetes, all communication, between all components, must go through the API server. All roads lead to the API Server. - The Cluster Store - the only stateful part of the Control Plane, stores the configuration and the state. Based on `etcd` (a popular distributed database). - The Controller Manager and Controllers - all the background controllers that monitor cluster components amd respond to events. - The Scheduler - watches the API server for new work tasks and assigns them to appropriate healthy worker nodes. Only responsible for picking the nodes to run tasks, it isn't responsible for running them. - The Cloud Controller Manager - its job is to facilitate integrations with cloud services, such as instances, load-balancers, and storage. Worker nodes - are where user applications run. At a high-level they do 3 things: 1. Watch the API server for new work assignments 2. Execute work assignments 3. Report back to the control plane (via the API server) 3 major components: 1. Kubelet - main Kubernetes agent and runs on every worker node. Watches the API server for new work tasks. Executes the task and maintains reporting channel back to the control plane. 2. Container runtime - kubelet needs it to perform container-related tasks - things like pulling images and starting and stopping containers. 3. Kube-proxy - runs on every node and is responsible for local cluster networking. In order to run on a Kubernetes cluster an application needs to: 1. Be packaged as a container 2. Be wrapped in a Pod 3. Be deployed via a declarative manifest file The declarative model: - declare the desired state of an application microservice in a manifest file - desired state - image, how many replicas, which network ports, how to perform updates - post it to the API server - using `kubectl` CLI (it uses a HTTP request) - Kubernetes stores it in the cluster store as the application's desired state - Kubernetes implements the desired state on the cluster - A controller makes sure the observed state of the application doesn't vary from the desired state - background reconciliation loops that constantly monitor the state of the cluster, if desired state != observed state - Kubernetes performs the necessary tasks Kubernetes Pod - a wrapper that allows a container to run on a Kubernetes cluster. Atomic unit of scheduling. VMware has virtual machines, Docker has containers, Kubernetes has Pods. In Kubernetes, every container must run inside a Pod. " Pod" comes from "a pod of whales" (group of whales is called "a pod of whales"). "Pod" and "container" are often used interchangeably, however it is possible (in some advanced use-cases) to run multiple containers in a single Pod. Pods don't run applications - applications always run in containers, the Pod is just a sandbox to run one or more containers. Pods are also the minimum unit of scheduling in Kubernetes. If you need to scale an app, you add or remove Pods. You do not scale by adding more containers to existing Pods. A pod is only ready for service when all its containers are up and running. A single Pod can only be scheduled to a single node. Pods are immutable. Whenever we talk about updating Pods, we mean - delete and replace it with a new one. Pods are unreliable. Example controller: Deployments - a high-level Kubernetes object that wraps around a Pod and adds features such as self-healing, scaling, zero-downtime rollouts, and versioned rollbacks. Services - provide reliable networking for a set of Pods. Services have a stable DNS name, IP address and name, they load-balance traffic across a dynamic set of Pods. As Pods come and go, the Service observes this, automatically updates itself, and continues to provide that stable networking endpoint. Service - a stable network abstraction that provides TCP/UPD load-balancing across a dynamic set of Pods. ## 3: Getting Kubernetes Hosted Kubernetes: AWS Elastic Kubernetes Service, Google Kubernetes Engine, Azure Kubernetes Service. Managing your own Kubernetes cluster isn't a good use of time and other resources. However, it is easy to rack up large bills if you forget to turn off infrastructure when not in use. The hardest way to get a Kubernetes cluster is to build it yourself. Play with Kubernetes - quick and simple way to get your hands on a development Kubernetes cluster. However, it is time limited and sometimes suffers from capacity and performance issues. Link: https://labs.play-with-k8s.com Docker Desktop - offers a single-node Kubernetes cluster that you can develop and test with. `kubectl` is the main Kubernetes command-line tool. At a high-level, `kubectl` converts user-friendly commands into HTTP REST requests with JSON content required by the Kubernetes API server. ```shell kubectl get nodes ``` ```shell kubectl config current-context ``` ```shell kubectl config use-context docker-desktop ``` ## 4: Working with Pods Controllers - infuse Pods with super-powers such as self-healing, scaling, rollouts and rollbacks. Every Controller bas a PodTemplate defining the Pods it deploys and manages. You rarely interact with Pods directly. Pod - the atomic unit of scheduling in Kubernetes. Apps deployed to Kubernetes always run inside Pods. If you deploy an app, you deploy it in a Pod. If you terminate an app, you terminate its Pod. If you scale your app up/down, you add/remove Pods. Kubernetes doesn't allow containers to run directly on a cluster, they always have to be wrapped in a Pod. 1. Pods augment containers - labels - group Pods and associate them with others - annotations - add experimental features and integrations with 3rd-party tools - probes - test the health and status of Pods and the apps they run, this enables advanced scheduling, updates, and more. - affinity and anti-affinity rules - control over where in the cluster Pods are allowed to run - termination controls - gracefully terminate Pods and the apps they run - security policies - enforce security features - resource requests and limits - min. and max. values for CPU, memory, IO, ... Despite bringing so many features, Pods are super-lightweight and add very little overhead. ```shell kubectl explain pods --recursive ``` ```shell kubectl explain pod.spec.restartPolicy ``` 2. Pods assist in scheduling Every container in a Pods is guaranteed to be scheduled to the same worker node. 3. Pods enable resource sharing Pods provide a shared execution environment for one or more containers (filesystem, network stack, memory, volumes). So if a Pod has 2 containers, both containers share the Pod's IP address and can access ony of the Pod's volumes to share data. There are 2 ways to deploy a Pod: - directly via a Pod manifest - called "Static Pods", no super-powers like self-healing, scaling, or rolling updates - indirectly via a controller - have all the benefits of being monitored by a highly-available controller running on the control-plane Pets vs Cattle paradigm - Pods are cattle, when they die, they get replaced by another. The old one is gone, and a shiny new one (with the same config, but a different IP and UID) magically appears and takes its place. This is why applications should always store state and data outside the Pod. It is also why you should not rely on individual Pods - they are ephemeral, here today, gone tomorrow. Deploying Pods: 1. Define it in a YAML manifest file 2. Post it to the API server 3. The API server authenticates and authorizes the request 4. The configuration (YAML) is validated 5. The scheduler deploys the Pod to a healthy worker node with enough available resources If you are using Docker or containerd as your container runtime, a Pod is actually a special type of container - a pause container. This means containers running inside of Pods are really containers running inside containers. The Pod Network is flat, meaning every Pod can talk directly to every other Pod without the need for complex routing and port mappings. You should use Kubernetes Network Policies. Pod deployment is an atomic operation - all-or-nothing - deployment either succeeds or fails. You will never have a scenario where a partially deployed Pod is servicing requests. Pod lifecycle: pending -> running (long-lived Pod) | succeeded (short-lived Pod) - short-lived - batch jobs, designed to only run until a task completes - long-lived - web-servers, remain in the running phase indefinitely, if container fail, the controller may attempt to restart them Pods are immutable objects. You can't modify them after they are deployed. You always replace a Pod with a new one (in case of a failure or update). If you need to scale an app, you add or remove Pods (horizontal scaling). You never scale an app by adding more of the same containers to a Pod. Multi-container Pods are only for co-scheduling and co-locating containers that need tight coupling. Co-locating multiple containers in the same Pod allows containers to be designed with a single responsibility but co-operate closely with others. Kubernetes multi-container Pod patterns: - Sidecar pattern - (most popular) the job of a sidecar is to augment of perform a secondary task for the main application container - Adapter pattern - variation of the sidecar pattern where the helper container takes non-standardized output from the main container and rejigs it into a format required by an external system - Ambassador pattern - variation of the sidecar pattern where the helper container brokers connectivity to an external system, ambassador containers interface with external systems on behalf of the main app container - Init pattern - runs a special init container that is guaranteed to start and complete before your main app container, it is also guaranteed to run only once ```shell kubectl get pods ``` Get pods info with additional info: ```shell kubectl get pods -o wide ``` Get pod info, a full copy of the Pod from the cluster: ```shell kubectl get pods -o yaml ``` Get even more info; spec - desired state, status - observed state: ```shell kubectl get pods hello-pod -o yaml ``` Pod manifest files: - kind - tells the Kubernetes the type of object being defined - apiVersion - defines the schema version to use when creating the object - metadata - names, labels, annotations, and a Namespace - spec - define the containers the Pod will run ```shell kubectl apply -f pod.yml ``` `kubectl describe` - a nicely formatted multi-line overview of an object: You can add the `--watch` flag to the command to monitor it and see when the status changes to _Running_. ```shell kubectl describe pods hello-pod ``` You can see ordering and names of containers using this command. `kubectl logs` - like other Pod related commands, if you don't specify `--container`, it executes against the first container in the pod: ```shell kubectl logs hello-pod ``` ```shell kubectl logs hello-pod --container hello-ctr ``` `kubectl exec` - execute commands inside a running Pod ```shell kubectl exec hello-pod -- pwd ``` Get shell access: ```shell kubectl exec -it hello-pod hello-pod -- sh ``` `-it` flag makes the session interactive and connects STDIN and STDOUT on your terminal to STD and STDOUT inside the first container in the Pod. Pod hostname - every container in a Pod inherits its hostname from the name of the Pod (`metadata.name`). With this in mind, you should always set Pod names as valid DNS names (a-z, 0-9, +, -, .). `spec.initCointainers` block defines one or more containers that Kubernetes guarantees will run and complete before main app container starts. ```shell kubectl delete pod git-sync ``` ## 5: Virtual clusters with Namespaces Namespaces are a native way to divide a single Kubernetes cluster into multiple virtual clusters. Namespaces partition a Kubernetes cluster and are designed as an easy way to apply quotas and policies to groups of objects. See all Kubernetes API resources supported in your cluster: ```shell kubectl api-resources ``` Namespaces are a good way of sharing a single cluster among different departments and environments. For example, a single cluster might have the following namespaces: dev, test, qa. Each one can have its own set of users and permissions, as well as unique resource quotas. Namespaces are not good for isolating hostile workloads. A compromised container or Pod in one Namespace can wreak havoc in other Namespaces. For example, you shouldn't place competitors such as Pepsi and Coke, in separate Namespaces on the same shared cluster. If you need strong workload isolation, the current method is to use multiple clusters. There are some attempts to do something different, but the safest and most common way of isolating workloads is putting them on their own clusters. Every Kubernetes cluster has a set of pre-created Namespaces (virtual clusters): ```shell kubectl get namespaces ``` - `default` is where newly created objects go if you don't specify a Namespace - `kube-system` is where DNS, the metrics server, and other control plane components run - `kube-public` is for objects that need to be readable by anyone - `kube-node-lease` is used for node heartbeat and managing node leases ```shell kubectl describe namespaces default ``` List service objects in a selected namespace: ```shell kubectl get svc --namespace kube-system ``` ```shell kubectl get svc --all-namespaces ``` Create a new Namespace, Pods don't create a Namespace automatically, a Namespace must be created first: ```shell kubectl create ns kydra ``` Switch between Namespaces: ```shell kubens shield ``` There are 2 ways to deploy objects to a specific Namespace: - imperatively - requires you to add the `-n` or `--namespace` flag to commands - declaratively - requires you to specify the Namespace in the YAML Delete Pods: ```shell kubectl detele -f shield.app.yml ``` Delete Namespace: ```shell kubectl delete ns shield ``` ## 6: Kubernetes Deployments Use Deployments to bring cloud-native features such as self-healing, scaling, rolling updates, and versioned rollbacks to stateless apps on Kubernetes. Kubernetes offers several controllers that augment Pods with important capabilities. The Deployment controller is designed for stateless apps. The Deployment spec is a declarative YAML object where you describe the desired state of a stateless app. The controller element operates as a backgrounds loop on the control plane, reconciling observed state with desired state. You start with a stateless application, package it as a container, then define it in a Pod template. At this point you have a static Pod - it does not self-heal, autoscale or is easy to update. That is why you almost always wrap them in a Deployment object. A Deployment object only manages a single Pod template. Deployments rely heavily on a ReplicaSet. Replica Sets manage Pods and bring self-healing and scaling. Deployments manage ReplicaSet and add rollouts and rollbacks. It is not recommended to manage ReplicaSets directly. Think of Deployments as managing ReplicaSets, and ReplicaSets as managing Pods. Deployments: - if Pods managed by a Deployment fail, they will be replaced (self-healing) - if Pods managed by a Deployment see increased or decreased load, they can be scaled 3 concepts fundamental to everything about Kubernetes: - desired state (what you want) - observed state (what you have) - reconciliation (if desired state != observed state, a process of reconciliation attempts to bring observed state into sync with desired state) Declarative model is a method of telling Kubernetes your desired state, while avoiding the detail of how to implement it. You leave the _how_ up to Kubernetes. Zero-downtime rolling-updates of stateless apps are what Deployments are about. They require a couple of things from your microservice applications in order to work properly: - loose coupling via APIs - backwards and forwards compatibility Each Deployment describes all the following: - how many Pod replicas - what images to use for the Pod's containers - what network ports to expose - details about how to perform rolling updates Deploying a new version: update the dame Deployment YAML file with the new image version and re-post it to the API server. Rollback: you wind one of the old ReplicaSets up while you wind the current one down. Kubernetes gives you fine-grained control over how rollouts and rollbacks proceed - insert delays, control the pace and cadence of releases, you can probe the health and status of updated replicas. YAML components: - `apiVersion: apps/v1` - Deployments available in the apps/v1 subgroup - `kind: Deployment` - Deployment object - `metadata.name: hello-deploy` - a valid DNS name - `spec` - anything nested below `spec` relates to the Deployment - `spec.templates` - the Pod template Deployments uses to stamp out Pod replicas - `spec.replicas` - how many Pod replicas the Deployment should create and manage - `spec.selector` - a list of labels that Pods must have in order for Deployments to manage them. This tells Kubernetes which Pods to terminate and replace when performing the rollout. - `spec.revisionHistoryLimit` - how many older versions/ReplicaSets to keep - `spec.progressDeadlineSeconds` - tells Kubernetes how long to wait during a rollout for each new replica to come online - `spec.strategy` - tells the Deployment controller how to upgrade the Pods when a rollout occurs - update using the Rolling Update strategy - never have more than one Pod below desired state (`maxUnavailable: 1`) - you will never have less than 9 replicas during the update process - never have more than one Pod above desired state (`maxSurge: 1`) - never have more than qq replicas during the update process - net result - update two Pods at a time, the delta between 9 and 11 is 2 ```yaml spec: replicas: 10 selector: matchLabels: app: hello-world revisionHistoryLimit: 5 progressDeadlineSeconds: 300 minReadySeconds: 10 strategy: type: RollingUpdate rollingUpdate: maxUnavailable: 1 maxSurge: 1 template: metadata: labels: app: hello-world spec: containers: - name: hello-pod image: nigelpoulton/k8sbook:2.0 ports: - containerPort: 8080 ``` Deploy to the cluster: ```shell kubectl apply -f deploy.yml ``` ```shell kubectl get deploy hello-deploy ``` ```shell kubectl describe deploy hello-deploy ``` ```shell kubectl get replicaset ``` ```shell kubectl describe replicaset hello-deploy-5cd5dcf7d7 ``` In order to access a web app from a stable name or IP address, or even from outside the cluster, you need a Kubernetes service object. A Service provide reliable networking for a set of Pods. Scaling the number of replicas manually - edit the YAML and set a different number of replicas or use the command: ```shell kubectl scale deploy hello-deploy --replicas 5 ``` Performing a rolling update (by replacement because Pods are immutable): ```shell kubectl apply -f deploy.yml ``` ```shell kubectl rollout status deployment hello-deploy ``` Pausing & resuming deployment: ```shell kubectl rollout pause deploy hello-deploy ``` ```shell kubectl rollout resume deploy hello-deploy ``` Detailed deployment info: ```shell kubectl describe deploy hello-deploy ``` Kubernetes maintains a documented revision history of rollouts: ```shell kubectl rollout history deployment hello-deploy ``` Rolling Updates create new ReplicaSets, old ReplicaSets aren't deleted. The fact the old ones still exist makes them ideal for executing rollbacks: ```shell kubectl rollout undo deployment hello-deploy --to-revision=1 ``` Modern versions of Kubernetes use the system generated pod-template-hash label so only Pods that were originally created by the Deployment/ReplicaSet will be managed: ```shell kubectl get pods --show-labels ``` ## 7: Kubernetes Services Controllers add self-healing, scaling and rollouts. Despite all of this, Pods are still unreliable, and you should never connect directly to them. Services provide stable and reliable networking for a set of unreliable Pods. Every Service gets its onw stable IP address, its own DNS name, and its own stable port. The Service fronts the Pods with a stable UP, DNS, and port. It also load-balances traffic to Pods with the right labels. With a Service in place, the Pods can scale up/down, they can fail, and they can be updated and rolled back. Despite all of this, clients will continue to access them without interruption. The Service is observing the changes and updating its lists of healthy Pods it sends traffic to. Think of Services as having a static front-end and a dynamic back-end. Services are loosely coupled with Pods via labels and selectors. This is ihe same technology that loosely couples Deployments to Pods. Every time you create a Service, Kubernetes automatically creates an associated Endpoints object. The Endpoints object is used to store a dynamic list of healthy Pods matching the Service's label selector. Any new Pods that match the selector get added to the Endpoints object. Types of Services: - accessible from inside the cluster - ClusterIP - default type, a stable virtual IP, every service you create gets a ClusterIP - accessible from outside the cluster - NodePort - built on top of CLusterIP and allow external clients to hit a dedicated port on every cluster node and reach the Service - LoadBalancer- make external access even easier by integrating with an internet-facing load-balancer on your underlying cloud platform Example Service object: ```yml spec: type: NodePort ports: - port: 8080 -- listen internally on port 8080 nodePort: 30001 -- listen externally on 30001 targetPort: 8080 -- forward traffic to the application Pods on port 8080 protocol: TCP -- use TCP (default) selector: -- send traffic to all healthy Pods on the cluster with the following metadata.labels chapter: services ``` Get Endpoint object: ```shell kubectl get endpointslices ``` Get details of each healthy Pods: ```shell kubectl describe endpointslice svc-test-xgnsv ``` If your cluster is on a cloud platform, deploying a Service with `type=LoadBalancer` will provision one of your cloud's internet-facing load-balancers and configure it to send traffic to your Service. ```shell kubectl get svc --watch ``` After ~2 minutes the value in the EXTERNAL-IP column will appear. Delete multiple resources: ```shell kubectl delete -f deploy.yml -f lb.yml -f svc.yml ``` ## 8: Ingress Ingress is all about accessing multiple web applications through a single LoadBalancer Service. - `Load Balancer` refers to a Kubernetes Service object of `type=LoadBalancer` - `load-balancer` refers to the internet-facing load-balancer on the underlying cloud Ingress exposes multiple Services through a single cloud load-balancer. Cloud load-balancers are expensive. ```shell kubectl get ing ``` Ingress classes allow you to run multiple Ingress controllers on a single cluster: - assign each Ingress controller to an Ingress class - when you create Ingress object, you assign them to an Ingress class ```shell kubectl get ingressclass ``` Ingress is a way to expose multiple applications and Kubernetes Services via a single cloud load-balancer. They are stable objects in the API but have feature overlap with a lot of service meshes - if you are running a service mesh you may not need Ingress. ## 9: Service discovery deep dive Finding stuff on a crazy-busy platform like Kubernetes is hard. Service discovery makes it simple. Apps need a way to find the other apps they work with. 2 components to service discovery: - registration - is the process of an application listing its connection details in a service registry so other apps can find it and consume it. Kubernetes uses its internal DNS as a service registry. All Kubernetes Services are automatically registered with DNS. - discovery - for service discovery to work, apps need to know to the name of the Service fronting the apps they want to connect to (rast is taken care of by Kubernetes) Get Pods running the cluster DNS: ```shell kubectl get pods -n kube-system -l k8s-app=kube-dns ``` Service discovery works like a typical routing - check your own table, if not found pass it to the next one. Domain name format: _object-name_._namespace_.svc.cluster.local, object name has to be unique within a Namespace, but not across Namespaces. ## 10: Kubernetes storage Kubernetes supports lots of types of storage from lots of different places. No matter what type of storage, or where is comes from, when it is exposed on Kubernetes it is called a volume. All that's required is a plugin allowing their storage resources to be surfaced as volumes in Kubernetes. Container Storage Interface - an open standard aimed at providing a clean storage interface for container orchestrators such as Kubernetes. Core storage-related API objects: - Persistent Volumes - are how external storage assets are represented in Kubernetes - Persistent Volume Claims - like tickets that grant access to a PV - Storage Classes - makes it all dynamic Storage Providers - AWS Elastic Block Store, Azure File, NFS volumes, ... The CSI is a vital place of the Kubernetes storage, however, unless you are a developer writing a storage plugins, you are unlikely to interact with it very often. Working with Storage Classes: - Create one or more StorageClasses on Kubernetes - Deploy Pods with PVCs that reference those Storage Classes Other settings: - Access mode: - ReadWriteOnce - a PV that can be only bound as R/W by a single PVC - ReadWriteMany - a PV that can be bound as R/W by multiple PCVs - ReadOnlyMany - a PV that can be bound as R/O by multiple PVCs - Reclaim policy - how to deal with a PV when its PVC is released: - Delete - it deletes the PV and associated storage resource on the external storage system - Retain - keep the associated PV object on the cluster as well as any data stored on the associated external asset ```shell kubectl get sc ``` ```shell kubectl get pv ``` ```shell kubectl get pvc ``` ## 11: ConfigMaps and Secrets Most apps comprise two main parts: the app & the configuration. Coupling the application and the configuration into a single easy-to deploy unit is an anti-pattern. De-coupling the application and the configuration has the following benefits: - re-usable application images (you can use the same image on dev, staging, prod) - simpler development and testing (easier to spot a mistake when the app and the config are decoupled, e.g. app crash after config change) - simpler and fewer disruptive changes Kubernetes provides an object called a ConfigMap that lets you store configuration data outside a Pod. It also makes it easy to inject config into Pods at run-time. You should not use ConfigMaps to store sensitive data such as certificates and passwords. Kubernetes provides a different object, called a Secret, for storing sensitive data. Behind the scenes, ConfigMaps are a map of key-value pairs, and we call each pair an entry: - Keys - an arbitrary name that can be created from alphanumerics, dashes, dots, and underscores - Values - anything, including multiple lines with carriage returns - Keys and Values are separated by a colon -- `key:value` Data in a ConfigMap, can be injected into containers at run-time via any of the following methods: - environmental variables (static variables, updates made to the map don't get reflected in running containers, major reason not to use environmental variables) - arguments to the container's startup command (the most limited methods, shares environmental variables' limitations) - files in a volume (the most flexible method) ConfigMap object don'§t have the concept of state (desired/actual) - this is why they have a `data` block instead of `spec` and `status` blocks. Creating a ConfigMap declaratively: ```yaml kind: ConfigMap apiVersion: v1 metadata: name: multimap data: given: Nigel family: Poulton ``` ```shell kubectl apply -f multimap.yml ``` ConfigMaps are extremely flexible and can be used to insert complex configurations, including JSON files and even scripts, into containers at run-time. View logs from a pod from a container: ```shell kubectl logs startup-pod -c args1 ``` ConfigMaps with volumes is the most flexible option. You can reference entire configuration files, as well as make updates to the ConfigMap that will be reflected in running containers. 1. Create the ConfigMap 2. Create a ConfigMap volume in the Pod template 3. Mount the ConfigMap volume into the container 4. Entries in the ConfigMap will appear in the container as individual files Update to a ConfigMap via re-applying ConfigMap YML. Check ENV variable value: ```shell kubectl exec cmvol -- cat /etc/name/given ``` Secrets are almost identical to ConfigMaps - they hold application configuration data that is injected into containers at run-time. Secrets are designed for sensitive data such as passwords, certificates, and OAuth tokens. Despite being designed for sensitive data, Kubernetes does not encrypt Secrets in the cluster store. Fortunately, it is possible to configure encryption-ar-rest with EncryptionConfiguration objects. Despite this, many people opt to use external 3rd-party tools, such as HasiCorp Vault. A typical workflow for a Secret is as follows: 1. The Secret is created and persisted to the cluster store as an un-encrypted object 2. A Pod that uses it gets scheduled to cluster node 3. The Secret is transferred over the network, un-encrypted, to the node 4. The kubelet on the node starts the Pod and its containers 5. The Secret is mounted into the container via in-memory tmpfs filesystem and decoded from base64 to plain text 6. The application consumes it 7. When the Pod is deleted, the Secret is deleted from the node ```shell kubectl get secrets ``` Create a Secret manually: ```shell kubectl create secret generic creds --from-literal user=piotr --from-literal pwd=qwerty ``` Decode base-64: ```shell echo cGlvdHI= | base64 -d ``` ```yaml apiVersion: v1 kind: Secret metadata: name: tkb-secret labels: chapter: configmaps type: Opaque data: -- stringData when using plaintext username: bmlnZWxwb3VsdG9u password: UGFzc3dvcmQxMjM= ``` The most flexible way to inject a Secret into a Pod is via a special type of volume called a Secret volume. Secret vols are automatically mounted as read-only to prevent containers and applications accidentally mutating them. ## 12: StatefulSets Stateful application - application that creates and saves valuable data, for example an app that saves data about client sessions and uses it for future sessions, or a database. StatefulSets guarantee: - predictable and persistent Pod names - name format: `StatefulSetName-Integer` - predictable and persistent DNS hostnames - predictable and persistent volume bindings Failed Pods managed by a StatefulSet will be replaced by new Pods with the exact same Pod name, the exact same DNS hostname, and the exact same volumes. This is true even if the replacement is started on a different cluster node. The same is not true of Pods managed by a Deployment. StatefulSets create one Pod at a time, and always wait for previous Pods to be running and ready before creating the next. Knowing the order in which Pods will be scaled down, as well as knowing that Pods will not be terminated in parallel, is a game-changer for many stateful apps. Note: deleting a StatefulSet object does not terminate Pods in order, with this in mind, you may want to scale down a StatefulSet to 0 replicas before deleting it. Headless Service is a regular Kubernetes Service object without an IP address. It becomes a StatefulSet's Governing Service when you list it in the StatefulSet config under `spec.serviceName`. StatefulSets are only a framework. Applications need to be written in ways to take advantage of the way StatefulSets behave. ## 13: API security and RBAC Kubernetes is API-centric and the API is served through the API server. Authentication (authN = "auth en") is about providing your identity. All requests to the API server have to include credentials, and the authentication layer is responsible for verifying them. The authentication layer in Kubernetes is pluggable, and popular modules include integration with external identity management systems such as Amazon Identity Access Management. In fact, Kubernetes forces you to use external identity management system. Cluster details and credentials are stored in a `kubeconfig` file. ```shell kubectl config view ``` Authorization (authZ - "auth zee") - RBAC (Role-Based Access Control) - happens immediately after successful authorization. It is about three things: users, actions, resources. Which users can perform which actions agains which resources. Admission Control runs immediately after successful Authentication and Authorization and is all about policies. There are 2 types of admission controllers: mutating (check for compliance and can modify requests) and validating (check for policy compliance, without request modification). Most real-world clusters will have a lot of admission controllers enabled. Example: a policy to require `env=prod` label, admission control can verify presence and add a label when it is missing. ## 14: The Kubernetes API Kubernetes is API centric. This means everything in Kubernetes is about the API, and everything ges through the API and API server. For most part, you will use `kubectl` to send requests, however you can craft them in code. ```shell kubectl proxy --port 9000 & ``` ```shell curl http://localhost:9000/api/v1/pods ``` The Kubernetes API is divided into 2 groups: - the core group - mature objects that were created in the early dats of Kubernetes before the API was divided into groups, located in `api/v1` - the named group - the future of the API, all new resources get added to named groups ```shell kubectl api-resources ``` Kubernetes has a strict process for adding new resources to the API. They come in as _alpha_ (experimental, can be buggy), progress through _beta_ (pre-release), and eventually reach _stable_. It is possible to write your custom controller or resource. ## 15: Threat modeling Kubernetes Threat modeling is the process of identifying vulnerabilities. The STRIDE model: - Spoofing - pretending to be somebody else with the aim of gaining extra privileges on a system - Tampering - the act of changing something in a malicious way, so you can cause one of the following: - denial of service - tampering with the resource to make it unusable - elevation of privilege - tampering with a resource to gain additional privileges - Repudiation - creating doubt about something, non-repudiation is proving certain actions were carried out by certain individuals - Information disclosure - when sensitive data is leaked - Denial of service - making something unavailable, many types of DoS attacks, but a well-known variation is overloading system to the point it can no longer service requests - Elevation of privilege - gaining higher access than what is granted, usually in order to cause damage or gain unauthorized access ================================================ FILE: books/kubernetes-in-action.md ================================================ [go back](https://github.com/pkardas/learning) # Kubernetes in Action, Second Edition Book by Marko Lukša ================================================ FILE: books/nlp-book.md ================================================ [go back](https://github.com/pkardas/learning) # Speech and Language Processing: An Introduction to Natural Language Processing, Computational Linguistics and Speech Recognition Book by Daniel Jurafsky and James H. Martin (December 2020 draft) - [Chapter 2: Regular Expressions, Text Normalization, Edit Distance](#chapter-2-regular-expressions-text-normalization-edit-distance) - [Chapter 3: N-gram Language Models](#chapter-3-n-gram-language-models) - [Chapter 4: Naive Bayes and Sentiment Classification](#chapter-4-naive-bayes-and-sentiment-classification) - [Chapter 5: Logistic Regression](#chapter-5-logistic-regression) - [Chapter 6: Vector Semantics and Embeddings](#chapter-6-vector-semantics-and-embeddings) - [Chapter 7: Neural Networks and Neural Language Models](#chapter-7-neural-networks-and-neural-language-models) - [Chapter 8: Sequence Labeling for Parts of Speech and Named Entities](#chapter-8-sequence-labeling-for-parts-of-speech-and-named-entities) - [Chapter 9: Deep Learning Architectures for Sequence Processing](#chapter-9-deep-learning-architectures-for-sequence-processing) - [Chapter 10](#chapter-10) - [Chapter 11: Machine Translation and Encode-Decoder Models](#chapter-11-machine-translation-and-encode-decoder-models) - [Chapter 12: Constituency Grammars](#chapter-12-constituency-grammars) - [Chapter 13-16](#chapter-13-16) - [Chapter 17: Information Extraction](#chapter-17-information-extraction) - [Chapter 18: Word Senses and WordNet](#chapter-18-word-senses-and-wordnet) - [Chapter 19](#chapter-19) - [Chapter 20: Lexicons for Sentiment, Affect and Connotation](#chapter-20-lexicons-for-sentiment-affect-and-connotation) - [Chapter 21-22](#chapter-21-22) - [Chapter 23: Question Answering](#chapter-23-question-answering) - [Chapter 24: Chatbots & Dialogue Systems](#chapter-24-chatbots--dialogue-systems) - [Chapter 25: Phonetics](#chapter-25-phonetics) - [Chapter 26: Automatic Speech Recognition and Text-to-speech](#chapter-26-automatic-speech-recognition-and-text-to-speech) ## Chapter 2: Regular Expressions, Text Normalization, Edit Distance *Regular Expressions* Regular expression is an algebraic notation for characterising a set of strings. Kleene * (cleany star) - zero or more occurrences. Kleene + - at least one Anchors - special characters that *anchor* regular expressions to particular places in a string (`^` - start, `$` - end of a string). `^` has multiple meanings: 1. match start of the line 2. negation inside square brackets `[^Ss]` - neither `S` nor `s` Pipe symbol `|` also known as "disjunction". Logical OR. `cat|dog` match either `cat` or `dog`. Operator precedence hierarchy: 1. Parenthesis: `()` 2. Counters: `* + ? {}`, `{}` - explicit counter 3. Sequences and anchors: `sequence ^the end$` 4. Disjunction: `|` Regular expressions are greedy, however there is a way to enforce non-greedy behaviour -> `*?` - Kleene star that matches as little text as possible, `+?` - Kleene plus that matches as little text as possible. Fixing RE errors might require following efforts: - Increasing precision (minimising false positives - incorrectly matched) - Increasing recall (minimising false negatives - incorrectly missed) *Substitution* - easiest to explain with an example: ``` the (.*)er they were, the \1er they will be --- will match --- the bigger they were, the bigger they will be ``` Number operator, e.g.: `\1` allows repeating matched group. So parenthesis operator not only allows to group but also store in a numbered register. It is possible to disable register and use non-capturing group, e.g.: `(:?some: a few) (people|cats) like some \1`. Famous chatbot ELIZA used a series of regular expressions substitutions. ``` I'M (depressed|sad) -> I AM SORRY TO HEAR YOU ARE \1 ``` Look ahead - look ahead in the text to see if some pattern matches BUT not advance the match cursor. Negative lookahead - used for ruling out special cases, e.g. rule out strings starting with word Volcano: `(?!Volcano)[A-Za-z]+` *Words* Fragment - broken-off word, "I do main- mainly", "main-" is a fragment here Filler - "um, uh" - used in spoken language, problem: should be treated as a word? Fragment and fillers are 2 kinds of disfluencies. Type - number of distinct words in a corpus. When we speak about number of words in the language, we are generally referring to word types. Herdan's Law/Heap's Law - relationship between number of types (`|V|`) and number of tokens (`N`) in the corpora: $$ |V| = kN^{\beta} $$ (k and 0 < B < 1 are constants) *Corpora* Writers, speakers have specific styles of communicating, use specific dialects, text can vary by time, place, function, race, gender, age, socioeconomic class. Code switching - common practice for speakers and writers to use multiple languages in single communicative act When preparing a computational models for language processing it is useful to prepare data-sheet, document answering questions like: Who produced the text? In what context? For what purpose? In what language? What was race, gender, ... of the authors? How data was annotated? *Text Normalisation* 1. Tokenisation (segmentation) 2. Normalising word formats 3. Segmenting sentences *Tokenisation* UNIX's `tr` command can be used for quick tokenisation of English texts. Problem: Keep specific words together: `2020/02/02`, `km/h`, `$65`, `www.github.com`, `100 000`, `I'm`, `New York` Tokeniser can be used to expand clitic contractions: `we're -> we are`. Tokenisation is tied up with Named Entity Recognition. Tokenisation needs to be fast, hence often uses deterministic algorithms based on regular expressions compiled into efficient finite state automata. Tokenisation is more complex for e.g.: Chinese or Japanese (languages not using spaces for separating words). For Japanese algorithms like words segmentation work better. Also, it is possible to use neural networks for the task of tokenisation. Penn Treebank Tokeniser - separates clitics (`doesn't -> does n't`) - keeps hyphenated words together (`close-up`, `Bielsko-Biała`) - separates out all punctuation Byte-Pair Encoding - begins with a vocabulary that is set of all individual characters - examines training corpus, chooses the two symbols that are most frequently adjacent (A, B -> AB) - continues to count and merge, creating longer and longer character strings, until k merges (parameter of the algorithm) - most words will be represented as full symbols, few rare will have to be represented by their parts *Normalisation* Task of putting words in a standard format, choosing a single normal form for words with multiple forms like the USA/US. Valuable proces despite spelling information is lost. Case folding - mapping everything to lower/upper case. However, might give wrong results, e.g.: US (country) -> us ( people, we) *Lemmatisation* Task of determining that two words have the same root (am, are, is -> be). Useful for web search - usually we want all forms to be found. Requires morphological parsing of the word. Morphology is the study of the way words are built up from smaller meaning-bearing units called morphemes. Morphemes have 2 classes: stems - central part of the word and affixes - (prefixes and suffixes). The Porter Stemmer Lemmatisation is hard, that's why sometimes we use stemming. ``` This -> Thi, was -> wa, Bone's -> Bone s, ... ``` Stemming is based on series of rules, e.g.: `ATIONAL -> ATE` (relational -> relate). They do make errors, but are fast and deterministic. Sentence Segmentation `?`, `!` are unambiguous, `.` is unfortunately ambiguous, it doesn't need to mean sentence end. Rule-based approach or machine learning. *Minimum Edit Distance* - minimum number of editing operations (add, delete, substitution) needed to transform one string into another. How to find Minimal Edit Distance? This can be thought of as the shortest path problem. Shortest sequence of edits from one string to another. This can be solved using dynamic programming (table-driven method for solving problems by combining solutions to sub-problems). ## Chapter 3: N-gram Language Models Assigning probabilities of upcoming words in a sentence is a very important task in speech recognition, spelling correction, machine translation and AAC systems. Systems that assign probabilities to sequences of models are called ** language models**. Simplest model is an n-gram. *P(w|h)* - the probability of a word *w* given some history *h*. $$ P(the|its\ water\ is\ so\ transparent\ that) = \dfrac{count(its\ water\ is\ so\ transparent\ that\ the)}{count(its\ water\ is\ so\ transparent\ that)} $$ You can compute these probabilities for a large corpus, e.g. wikipedia. This method works fine in many cases, but it turned out even the web can not give us good estimates in most cases - language is dynamic, you are not able to count ALL the possible sentences. Hence, there is a need for introducing more clever way for estimating the probability *P(w|h)*. Instead of computing the probability of a word given its entire history, we can approximate the history by just the last few words. The bigram model approximates the probability by taking the last word, so for the example we had earlier (so in general: n-gram takes *n - 1* words into the past, trigrams are most commonly used, 4/5-grams are used when there is sufficient training data): $$ P(the|its\ water\ is\ so\ transparent\ that) \approx P(the|that) $$ This assumption, that next word depends on the previous one is called a **Markov** assumption. Probability of a sentence can be calculated using chain rule of probability: $$ P(\ i\ want\ english\ food\ ) = P(i|)P(want|i)P(english|want)P(food|english)P(|food) =\ ... $$ Such technique is able to capture e.g. cultural things - people more often look for Chinese food than English. Language models are always computed in log format - log probabilities. Why? Probability always fall between 0 and 1, multiplying small float numbers - you end up with numerical underflow, using logarithms you get numbers that are not as small. *Evaluating Language Models* Best way to evaluate the performance of a language model is to embed it in an application and measure how much the application improves - **extrinsic evaluation**. However, this technique requires running multiple models in order to measure the improvement. Better approach is to use **intrinsic evaluation** - standard approach from ML, training set and validation (unseen) set. So the better predictions on the test set, the better model you got. Sample from test set can not appear in training set - this introduces bias - probabilities gets too high (unreliable) - huge inaccuracies in perplexity - probability based metric. If a particular test set is used too often, we implicitly tune to its characteristics. *Perplexity* PP for short, metric used for evaluating language model. Perplexity on a test set is the inverse probability of the test set, normalised by the number of words. Minimising perplexity is equivalent to maximising the test set probability according to the language model. Another way of thinking about perplexity: weighted average branching factor (branching factor - number of possible next words that can follow any words). The more information the n-gram gives us about the word sequence, the lower the perplexity (unigram: 962, bigram: 170, trigram: 109). An intrinsic improvement in perplexity does not guarantee an extrinsic improvement in the performance. In other words: because some metric shows your model is great, it does not mean it will do so great in real life. Perplexity should be confirmed by an end-to-end evaluation of a real task. *Generalisation and zeros* N-gram model is highly dependant on the training model, also it does better job as we increase *n*. You need to use similar genres for the training - Shakespearian English is far different from WSJ's English. To build model for translating legal documents you need to train it on legal documents, you need to build a questions answering system, you need to use questions for training. It is important to use appropriate dialects and variety (African American Language, Nigerian English, ...). Zeros: Imagine you trained a model on a corpus, "denied the: allegations, speculation, rumours, report", but for the test you check phrases like "denied the: offer, loan", model would estimate probability as 0: $$ P(offer|denied\ the) = 0 $$ This is bad... if you want to calculate perplexity, you would need to divide by zero. Which is kinda problematic. So what about words we haven't seen before (open vocabulary -> out of vocabulary words / unknown words)? Add pseudo word ``. You can use this tag to replace all the words that occur fewer than some small number *n*. *Smoothing* (discounting) - process of shaving off a bit of probability mass from some more frequent events and give it to the events we have never seen. There are variety of ways to do smoothing: - Laplace Smoothing (add-one smoothing) - adds 1 to all bigram counts before we normalise them into probabilities. So all the counts that uses do be 0, becomes 1, 1 will be 2, ... This method is not used in state-of-the-art solutions. Can be treated as a baseline. - Add-k smoothing - Instead of adding 1, we add a fractional count e.g. 0.5, 0.05, 0.01, ... Useful for some of the applications but still, does not perform perfectly. Backoff - we can use available knowledge, if you need to computer trigram, maybe bigram can help you with that, or even unigram. Sometimes, this might be sufficient. Interpolation - mix the probability estimates form all the n-gram estimators *Kneser-Ney Smoothing* - most commonly used method. It uses following observation: "words that have appeared in more contexts in the past are more likely to appear in some new context as well". The best performing method is a modified Kneser-Ney Smoothing. *Huge Language Models and Stupid Backoff* Google open-sourced their The Web 1 Trillion 5-gram corpus, they released also Google Books Ngrams. There is also COCA. Stupid backoff - algorithm for a language model, gives up idea of making the idea of trying to make the model a true probability distribution, no discounting. If a higher-order n-gram has zero count, we simply backoff to a lower order n-gram, this algorithm does not produce a probability distribution. ## Chapter 4: Naive Bayes and Sentiment Classification Many problems can be viewed as classification problems: text categorisation, sentiment analysis, language identification, authorship attribution, period disambiguation, tokenisation, and many more. Goal is to take a sample, extract features and classify the observation. *Naive Bayes Classifiers* Classifiers that make simplified (naive) assumption about how the features interact. Binary Multinomial Naive Bayes (binary NB) - used for sentiment analysis, clip the word counts in each document at 1 ( extract unique words from the documents and count occurrence). How to deal with negations? I really like this movie (positive), I don't like this movie (negative). Very simple baseline, commonly used is: during text normalisation prepend the prefix *NOT_* to every word after a token of logical negation. ```` i didn't like this movie , but ... -> i didn't NOT_like NOT_this NOT_movie , but ... ```` Chapter 16 will tell more about parsing and relationship between negations. Sentiment lexicons - lists of words that are pre-annotated with positive or negative sentiment. Popular lexicon: General Inquirer or LIWC. For Naive Bayes you can add a feature "this word occurs in the positive lexicon" instead of counting each words separately. Chapter 20 will tell how lexicons can be learned automatically and other use cases besides sentiment analysis will be shown. Spam detection - Naive Bayes + regex + HTML scan Language identification - Naive Byes but not on the words! Used Character n-grams. Naive Bayes can be viewed as a language model. *Evaluation* Confusion matrix - table for visualising how an algorithm performs with respect to the human *gold label* (human labeled data - gold labels). Has 2 dimensions - system output and gold labels. Accuracy - what percentage of all observations our system labelled correctly, doesn't work well for unbalanced classes - e.g. 80 negative classes, 20 *positive*, learn to always answer *negative* and you have 80% *accuracy*. Precision - percentage of the items that the system detected that are in fact positive. Recall - percentage of the items actually present in the input that were correctly identified by the system. F-measure - combines both metrics - weighted harmonic mean of precision and recall - conservative metric, closer to the minimum of the two values (comparing to the arithmetic mean). *Evaluating with more than two classes* Macro-averaging - compute the performance of each class and then average over classes. Can be dominated by the more frequent class. Micro-averaging - collect decisions for all classes into a single confusion matrix and then computer precision and recall from that table. Reflects better the statistics for the smaller classes, more appropriate when the performance on all the classes is equally important. *Test sets and Cross-validation* Cross validation - when your dataset is not large enough, you can use it all for training and validating by using cross-validation. Process of selecting random training and validation sets, training the classifier, computing the error and the repeating it once again. Usually 10 times. *Statistical Significance Testing* We often need to compare the performance of two systems. How can we know one system is better than the another? *Effect size* - difference between F1-scores. *Null hypothesis* - we suppose *delta > 0*, we would like to know if we can confidentially rule out this hypothesis. In order to do this, create random variable *X* ranging over all test sets, we ask: how likely is it if the null hypothesis is correct that among these test sets we would encounter the value of *delta* that we found. This likelihood is called * p-value*. We select the threshold - usually small, if we can reject the *null hypothesis* we can tell A is better than B - is *statistically significant*. *Avoiding harms in classification* Representational harms - system perpetuating negative stereotypes about social groups. Toxicity detection - hate speech, abuse, harassment detection. These systems make harm themselves, for example: mark sentences mentioning minorities. System based on stereotypes can lead to censorship. Also, human labeled data can be biased. It is important to include *model card* when releasing a system. Model card includes: training algorithms and parameters, data sources, intended users and use, model performance across different groups. ## Chapter 5: Logistic Regression Logistic regression - one of the most important analytic tools in the social and natural sciences. Baseline supervised machine learning algorithm for classification. Neural network can be seen as a series of logistic regression classifiers stacked on top of each other. This is a discriminative classifier (unlike Naive Bayes - generative classifier - you can literally as such model how for example dog or cat looks like, discriminative model learns only how to distinguish the classes, e.g. training set with dogs with collars and cats - when you ask a model what does it know about cats it would respond: it doesn't wear a collar). Classification: *The Sigmoid* Sigmoid function - takes a real value (even x -> infinity) and maps it to the range [0, 1]. Nearly linear near 0. This is extremely useful for calculating e.g. *P(y=1|x)* - belonging to the class. $$ z = weights\ of \ feature\ vector\ *\ x + bias $$ $$ P(y=1) = \sigma(z) $$ *z* - ranges from *-inf* to *+inf*. Logistic regression can be used for all sorts of NLP tasks, e.g. period disambiguation (deciding if a period is the end of a sentence or part of a word). *Designing features* - features are generally designed by examining the training set with an eye to linguistic intuitions. *Representation learning* - ways to learn features automatically in an unsupervised way from the input. *Choosing a classifier* - Logistic Regression great at finding correlations. *Loss / cost function* - The distance between the system output and the gold output. Gradient descent - optimisation algorithm for updating the weights. It is a method that finds a minimum of a function by figuring out in which direction the function's slope is rising the most steeply. $$ \theta\ -\ weights,\ in\ the\ case\ of\ logistic\ regression\ \theta = weights,\ bias $$ *Convex function* - function with one minimum. No local minima to get stuck. Local minima is a problem in training neural networks - non-convex functions. *Learning rate* - the magnitude of the amount to move in gradient descent (hyper-parameter). *Hyper-parameters* - special parameters chosen by the algorithm designer that affect how the algorithm works. *Batch training* - we compute gradient over the entire dataset, quite expensive to compute. Possibility to use * mini-batch* training, we train on a group of *m* examples (512 or 1024). *Regularisation* - a good model should generalise well, there is a problem of overfitting it model fits the data too perfectly. There is a possibility to add a regularisation - L1 (lasso regression) and L2 (ridge regression) regularisation. *Multinomial logistic regression* (*softmax* regression) - for classification problems with more than 2 classes. The multinomial logistic classifier uses a generalisation of the sigmoid function called softmax function. *Model interpretation* - Often we want to know more than just the result of classification, we want to know why classifier made certain decision. Logistic regression is interpretable. ## Chapter 6: Vector Semantics and Embeddings *Distributional hypothesis* - the link between similarity in how words are distributed and similarity. *Lemma / citation* form - basic form of a word. *Wordform* - inflected lemma. Lemma can have multiple meanings, e.g. mouse might refer to a rodent or to a pointer, each of these are called word senses. Lemmas can be polysemous (have multiple senses), this makes interpretation difficult. Word sense disambiguation - the task of determining which sense of a word is being used in particular context. *Synonyms* - two words are synonymous if they are substitutable - have the same propositional meaning. *Principle of contrast* - a difference in linguistic form is always associated with some difference in meaning, e.g.: water / H2O, H2O - rather used in scientific context. *Word similarity* - *cat* is not a synonym of a *dog*, but these are 2 similar words. There are many human-labelled datasets for this. *Word relatedness* - (or association) e.g.: *coffee* is not similar to *cup*, they shave ro similar features, but they are very related - associated, they co-exist. Very common kind of relatedness is semantic field, e.g.: *surgeon, scalpel, nurse, hospital*. Semantic fields are related to topic models like LDa - Latent Dirichlet Allocation - unsupervised learning on large sets of texts to induce sets of associated words from text. There are more relations between words: hypernymy, antonymy or meronymy. *Semantic Frames and Roles* - a set of words that denote perspectives or participants in a particular type of event, e.g.: *Ling sold the book to Sam* - seller / buyer relation. Important problem in question answering. *Connotation* - affective meaning - emotions, sentiment, opinions or evaluations. *Sentiment* - valence - the pleasantness of the stimulus, arousal - the intensity of emotion provoked by the stimulus, dominance - the degree of control exerted by the stimulus. In 1957 Osgood used these 2 values to represent a word - revolutionary idea! Word embedded in 3D space. *Vectors semantics*. Word's meaning can be defined by its distribution in language - use neighbouring words. Idea of vector semantics is to represent a word as a point in a multidimensional semantic space (word embedding) that is derived from the distributions of word neighbours. *Information retrieval* - the task of finding the document *d* from the *D* documents in some collection that best matches a query *q*. *Cosine* - similarity metric between 2 words (angle between 2 vectors) *TF-IDF* - Raw frequencies - not the best way to measure association between words (a lot of noise from words like *the, it, they, ...*). Term Frequency - the frequency of a word *t* in document *d*. The second factor gives higher weights to words that occur only in a few documents. *PMI* - Point-wise Mutual Information - measure how often 2 events occur, compared with what we would expect if they were independent. A useful tool whenever we need to find words that are strongly associated. It is more common to use PPMI. Very rare words tend to have very high PMI. *Word2vec* - dense word embedding, the intuition of word2vec is that instead of counting how often each word *w* occurs near word *u*, we train a classifier on a binary classification task: "Is word *w* likely to show up near word *u*?". We can use running text as supervised training data. - this is called self-supervised training data. Visualising embeddings - visualise the meaning of a word embedded in space by listing the most similar words, clustering algorithms and the most important method - dimensionality projection, e.g. t-SNE. *First-order co-occurrence / Syntagmatic association* - if words are near each other, e.g. *wrote* and *book*. *Second-order co-occurrence / Paradigmatic association* - if words have similar neighbours, e.g. *wrote*, *said* *Representational harm*. Embeddings are capable of capturing bias and stereotypes. More, they are capable of amplifying bias. ## Chapter 7: Neural Networks and Neural Language Models Neural network share much of the same mathematics as logistic regression, but NNs are more powerful classifier than logistic regression. Neural networks can automatically learn useful representations of the input. *Unit* - takes a set of real values numbers as input, performs some computations on them and produces an output. Is taking weighted sum of inputs + bias. Output of this function is called an activation. $$ y = a = f(z) = f(w \cdot x + b) $$ *f* - e.g. sigmoid, tanh, ReLU. Sigmoid most commonly used for teaching. Tanh is almost always better than sigmoid. ReLU (rectified linear unit) - most commonly used and the simplest. *The (famous) XOR problem* - Minsky proved it is not possible to build a perceptron (very simple neural unit that has a binary output and does not have a non-linear activation function) to compute logical XOR. However, it can be computed using a layered neural network. *Feed-Forward Neural Network*. Multi-layer network, units are connected without cycles. Sometimes called multi-layer perceptrons for historical reasons, modern networks aren't perceptrons (aren't linear). Simple FFNN have 3 types of nodes: input units, hidden units and output units. The core of the neural network is the hidden layer formed of hidden units. Standard architecture is that each layer is fully connected - each unit in each layer takes all the outputs from the previous layer. Purpose of learning is to learn weights and bias on each layer. *Loss function* - the distance between the system output and the gold output, e.g. cross-entropy loss. To find the parameters that minimise this loss function, we use for example *gradient descent*. Gradient descent requires knowing the gradient of the loss function with respect to each of the parameters. Solution for computing this gradient is error back-propagation. Language modeling - predicting upcoming words from prior word context - neural networks are perfect at this task. Much better than *n-gram* models - better generalisation, higher accuracy, on the other hand - much slower to train. ## Chapter 8: Sequence Labeling for Parts of Speech and Named Entities *Named entity* - e.g. Marie Curie, New York City, Stanford University, ... important for many natural language understanding tasks (e.g. sentiment towards specific product, question answering). Generally speaking, anything that can be referred to with a proper name (person, location, organisation). Possible output tags: PER (person), LOC (location), ORG (organisation) and GPE (geopolitical entity). *POS/Part of Speech* - knowing if a word is noun or verb tells us about likely neighbouring words. They fall into 2 categories: closed class and open class. POS-tagging is the process of assigning a part-of-speech to each word in a text. Tagging is a disambiguation task. Words are ambiguous, one can have more than one POS e.g. book flight, hand me that book, ... The goal is to resolve these ambiguities. The accuracy of POS tagging algorithms is very high +97%. Most Frequent Class Baseline - effective, baseline method, assign token to the class that occurs most often in the training set. Markov chain - a model that tells about the probabilities of sequences of random variables. A Markov chain makes a very strong assumption - if you want to predict future sentence, all that matters is the current state. Formally a Markov chain is specified by: set of *N* states, a transition probability matrix and initial probability distribution. The Hidden Markov Model - allows talking about both observed events (words seen in the input) and hidden events ( part-of-speech tags). Formally HMM is specified by: set of *N* states, a transition probability, observations, observation likelihoods / emission probabilities (probability of an observation begin generated from a state *q*) and initial probability distribution. HMM is a useful and powerful model, but needs a number of augmentations to achieve high accuracy. CFR is a log-linear model that assigns a probability to an entire output sequence. We can think of a CRF as like a giant version of what multinomial logistic regression does for a single token. Gazetteer - list of place names, millions of entries for locations with detailed geographical and political information, e.g. https://www.geonames.org/ POS tags are evaluated by accuracy. NERs are evaluated using recall, precision and F1. Named Entity Recognition is often based on rule based approaches. ## Chapter 9: Deep Learning Architectures for Sequence Processing Language is inherently temporal phenomenon. This is hard to capture using standard machine learning models. *Perplexity* - a measure of model quality, perplexity of a model with respect to an unseen test set is the probability the model assigns to it, normalised by its length. *RNN - Recurrent Neural Network* - any network that contains a cycle within its network connections. Any network where the value of a unit is directly or indirectly dependent on its own earlier outputs as an input. Within RNNs there are constrained architectures that have proven to be extremely effective. *Elman Networks / Simple Recurrent Networks* - very useful architecture, also serves as the basis for more complex approaches like LSTM (Long Short-Term Memory). RNN can be illustrated as a feedforward network. New set of weights that connect the hidden layer from the previous time step to the current hidden layer determine how the network makes the use of past context in calculating the output for the current input. RNN-based language models process sequences a word at a time, attempting to predict the next word in a sequence by using the current word and the previous hidden state as inputs. RNNs can be used for many other tasks: - sequence labeling - task is to assign a label chosen from a small fixed set of labels to each element of a sequence ( e.g. POS tagging or named entity recognition). Inputs for RNN are word embeddings and the outputs are tag probabilities generated by softmax layer. - sequence classification - e.g. sentiment analysis, spam detection, message routing for customer support applications. Stacked RNN - multiple networks where the output of one layer serves as the input to a subsequent layer. They very often outperform single-layer networks, mainly because stacked layers are able to have different level of abstractions across layers. Optimal number of layers is application-dependant. Bidirectional RNN = forward and backward networks combined. In these 2 independent networks input is processed form the start to the end and from the end to the start. Also, very effective for sequence classification. It is difficult to train RNNs for tasks that require a network to make use of information distant from the current point of processing. RNNs can not carry forward critical information because of hidden layers and because they are fairly local. LSTM - Long Short-Term Memory - divide the context management problem into two sub-problems: - removing information no longer needed from the context - adding information likely to be needed for later decision-making LSTM is capable of mitigating the loss of distant information. However, there are still RNNs, so relevant information can be lost. Transformers - approach to sequence processing that eliminates recurrent connections and returns to architectures reminiscent of the fully connected networks. Transformers are made up of stacks of networks of the same length of simple linear layers, feedforward networks and custom connections. Transformers use *self-attention layers* - they allow network to directly extract and use information from arbitrarily large contexts without the need to pass it through intermediate recurrent connections as in RNNs. At the core of an attention-based approach is the ability to compare an item of the interest to a collection of other items in way that reveals their relevance in the current context. It turns out, language models can generate toxic language. Many models are trained on data from Reddit (majority of young, males - not representative). Language model can also leak information about training data - meaning it can be attacked. ## Chapter 10 Missing chapter. ## Chapter 11: Machine Translation and Encode-Decoder Models Machine translation - the use of computers to translate from one language to another. The most common use of machine translation is information access - when you want to for example translate some instructions on the web. Also, often used in CAT - Computer-Aided Translation, where computer produces draft translation and then human fixes it in post-editing. Last but not least, useful in human communication needs. Standard algorithm for MT is encoder-decoder network (can be implemented with RNNs or with Transformers). They are extremely successful in catching small differences between languages. Some aspects of human language seem to be universal - true for every or almost every language, for example every language have words for referring to people, eating or drinking. However, many languages differ what causes translation divergences. German, French, English and Mandarin are all SVO (Subject-Verb-Object) languages. Hindi and Japanese are SOV languages. Irish and Arabic are VSO languages. VO languages generally have prepositions, OV languages generally have postpositions. Machine Translation and Words Sense Disambiguation problems are closely linked. Encode-decoder (sequence-to-sequence) networks are models capable of generating contextually appropriate, arbitrary length, output sequences. Encoder (LSTM, GRU, convolutional networks, Transformers) takes an input sequence and creates a contextualised representation of it, then representation is passed to decoder (any kind of sequence architecture) which generates a task-specific output sequence. Machine translation raises many ethical of the same issues that we have discussed previously. MT systems often assign gender according to culture stereotypes. Some reaserch found that MY systems perform worse when they are asked to translate sentences that describe people with non-stereotypical gender roles. ## Chapter 12: Constituency Grammars Syntactic constituency is the idea that groups of words can behave as single units. The most widely used formal system for modeling constituent structure in English is Context-Free Grammar, also called Phrase-Structure Grammars, and the formalism is equivalent to Backus-Naur Form (BNF). A context-free grammar consist of a set of rules or productions, each of which expresses the ways that symbols of the language can be grouped and ordered together. Treebank - parse tree. ## Chapter 13-16 Skipped for now. ## Chapter 17: Information Extraction Information extraction - turns the unstructured information embedded in texts into structured data - e.g. relational database to enable further processing. Relation extraction - finding and classifying semantic relations among the text entities. These are often binary relations - child of, employment, part-whole. Task of NER is extremely useful here. Wikipedia also offers large supply of relations. RDF - Resource Description Framework - tuple of entry-relation-entry. DBPedia was derived from Wikipedia and contains over 2 bilion RDF triples. Freebase - part of Wikidata, has relations between people and their nationality or locations. There are 5 main classes of algorithms for relation extraction: - handwritten patterns - high-precision and can be tailored to specific domains, however low recall and a lot of work - supervised machine learning - for all entity pairs determine if are in relation - semi-supervised machine learning (bootstrapping and via distant supervision) - bootstrapping proceeds by taking the entities in the seed pair and then finding sentences that contain both entities. - unsupervised For unsupervised and semi-supervised approaches it is possible to calculate estimated metrics (like estimated precision) . Knowledge graphs - dataset of structured relational knowledge. Event extraction - task of identification mentions of events in texts. In English most events correspond to verbs and most verbs introduce events (United Airlines SAID, prices INCREASED, ...). Some noun phrases can also denote events ( the increase, the move, ...). With extracted events and extracted temporal expressions, events from text can be put on a timeline. Determining ordering can be viewed as a binary relation detection and classification task. Event coreference - is needed to figure out which event mentions in a text refer to the same event. Extracting time - temporal expressions are used to determine when the events in a text happened. Dates in text need to be normalised. - relative: yesterday, next semester - absolute: date - durations Temporal expressions task consists of finding the start and the end of all the text spans that correspond to such temporal expressions. Such task can use rule-based approach. Temporal Normalisation - process of mapping a temporal expressions to either a specific point in time or to a duration. Template filling - the task of describing stereotypical or recurring events. ## Chapter 18: Word Senses and WordNet Ambiguity - the same word can be used to mean different things. Words can be polysemous - have many meanings. Word sense is a discrete representation of one aspect of the meaning of a word. Meaning can be expressed as an embedding, for example embedding that represents the meaning of a word in its textual context. Alternative for embeddings are glosses - written for people, a gloss is just a sentence, sentence can be embedded. Other way of defining a sense is through relationships ("right" is opposite to "left"). Relations between senses: - synonymy - when two senses of two different words are (almost) identical - couch / sofa, vomit / throw up - antonymy - when two words have an opposite meaning - long / short, fast / slow - hyponym / subordinate - when one word is more specific than the other word - car (hyponym) -> vehicle - hypernym / superordinate- when one word is more general than the other word - vehicle (hypernym) -> car - meronymy - when one word describes part of the other word - wheel (meronym) -> car - holonym - opposite to meronym - car (holonym) -> wheel - metonymy - the use of one aspect of a concept to refer to other aspects of the entity - Jane Austen wrote Emma ( author) <-> I really love Jane Austen (works of author), WordNet - a large online thesaurus, a database that represents word senses. WordNet also represents relations between senses (is-a, part-whole). The relation between two senses is important in language understanding, for example - antonymy - words with opposite meaning. English WordNet has 3 separate databases (nouns, verbs, adjectives and adverbs). Synset - (Synonym Set) - the set of near-synonyms for a WordNet sense. Glosses are properties of a synset. Word Sense Disambiguation - the task of determining which sense of a word is being used in a particular context. WSD algorithms take as input some word and context and output the correct word sense. Lexical sample tasks - small pre-selected set of target words and an inventory of senses. All-words task (harder problem) - the system is given an entire texts and a lexicon with an inventory of senses for each entry, and we have disambiguate every word in the text. The best WSD algorithm is simple 1-nearest-neighbour algorithm using contextual word embeddings. There are also feature-based algorithms for WSD - POS-tags, n-grams (3-gram most commonly used), weighted average of embeddings - passed to SVM classifier The Lesk algorithm - the oldest and the most powerful knowledge-based WSD metod and useful baseline. Lest is a family of algorithms that choose the sense whose dictionary gloss or definition shares the most words with the target word's neighbourhood. BERT - uses contextual embeddings. Word Sense Induction - unsupervised approach, we don't use human-defined word senses, instead, the set of senses of each word is created automatically from the instances of each word in the training set. ## Chapter 19 Skipped for now. ## Chapter 20: Lexicons for Sentiment, Affect and Connotation Connotation - the aspect of word's meaning that are related to a writer or reader's emotions, sentiment, opinions or evaluations. Emotion - (by Scherer) relatively brief episode of response to the evaluation of an external or internal event as being of major significance. Detecting emotions has the potential to improve a number of language processing tasks - detecting emotions in reviews, improve conversation systems, depression detection. Basic emotions proposed by Ekman - surprise, happiness, anger, fear, disgust, sadness Basic emotions proposed by Plutchik - joy-sadness, anger-fear, trust-disgust, anticipation-surprise. Most models include 2-3 dimensions: - valence - the pleasantness of the stimulus - arousal - the intensity of emotion provoked by stimulus - dominance - the degree of control exerted by the stimulus The General Inquirer - the oldest lexicon of 1915 positive words and 2291 negative words. The NRC Valence, Arousal and Dominance scores 20 000 words (this model assigns valence, arousal and dominance). The NC WordEmotion Association Lexicon uses Plutchik's basic emotions to describe 14 000 words. There are many more lexicons. Best-worst scaling - method used in crowdsourcing, annotators are given N items and are asked which item is the best and which item is the worst. Detecting peron's personality from their language can be useful for dialog systems. Many theories of human personality are based around a small number of dimensions: - extroversion vs introversion - sociable, assertive vs aloof, reserved, shy - emotional stability vs neurocriticism - calm, unemotional vs insecure, anxious - agreeableness vs disagreeableness - friendly, cooperative vs antagonistic, fault-finding - conscientiousness vs unconscientiousness - self-disciplined, organised vs inefficient, careless - openness to experience - intellectual, insightful s shallow, unimaginative Connotation frames - express richer relations to affective meaning that a predicate encodes about its arguments - Country A violated the sovereignty of Country B. ## Chapter 21-22 Skipped for now. ## Chapter 23: Question Answering Two major paradigms of question answering: - information retrieval - knowledge-based Factoid questions - questions that can be answered with simple facts expressed in short texts, like: Where is the Louvre Museum located? Information retrieval. The resulting IR system is often called a search engine. Ad hoc retrieval, user poses a query to a retrieval system, which then returns an ordered set of documents from some collection. Basic IR system architecture uses the vector space, queries and documents are mapped to vector, then cosine similarity is being used to rank potential documents answering the query. This is an example of the bag-of-words model. However, we don't use raw word counts in IR, instead we use TD-IDF. TD-IDF - The term frequency tells us how frequent the word is, words that occur more often are likely to be informative about the document's contest. However, terms that occur across all documents aren't useful. In such case inverse document frequency comes handy. Documents scoring - we score document d by the cosine of its vector d with the query vector q. $$ score(q, d) = cos(q * d) = \frac{q*d}{|q|*|d|} $$ More commonly used version of the score (because queries are usually short): $$ score(q, d) =\sum \frac{tf-idf(t,d)}{|d|} $$ Slightly more complex version of TF-IDF is called BM25. In the past it was common to remove high-frequency words from query and the document. The list of such high-frequency words to be removed is called a stop list (the, a, to, ...). Worth to know, however not commonly used recently because of much better mechanisms. Inverted index - given a query term, gives a list of documents that contain the term. TF-IDF / BM25 have conceptual flaw - they work only if there is exact overlap of words between the query and document - vocabulary mismatch problem. Solution to this is to use synonymy - instead of using word-count, use embeddings. Modern methods use encoders like BERT. The goal of IR-based QA (open domain QA) is to answer a user's question by finding short text segments from the web or some document collection. Datasets: - SQuAD - Stanford Question Answering Dataset - contains passages form Wikipedia and associated questions. - HotpotQA - is the dataset that was created by showing crowd workers multiple context documents and asked to come up with questions that require reasoning about all the documents. - TriviaQA - questions written by trivia enthusiasts, question-answer-evidence triples - The Natural Questions - real anonymised queries to the Google search engine, annotators were presented a query along with Wikipedia page from the top 5 results - TyDi QA - questions from diverse languages Entity linking - the task of associating a mention in text with the representation of some real-word entity in an ontology (e.g. Wikipedia). Knowledge-based question answering - idea of answering a question by mapping it to a query over a structured database. RDF triples - a tuple of 3 elements: subject, predicate and object, e.g. (Ada Lovelace, birth-year, 1815). This can be used to perform queries: "When was Ada Lovelace born?" - birth-year(Ada Lovelace, ?). Second kind uses a semantic parser to map the question to a structured program to produce an answer. Another alternative is to query a pretrained model, forcing model to answer a question solely from information stored in its parameters. T5 is an encoder-decoder architecture, in pretraining it learns to fill in masked spans of task by generating missing spans in the decoder. It is then fine-tuned on QA datasets, given the question without adding any additional context or passages. Watson DeepQA - system from IBM that won the Jeopardy - main stages - Question processing, Candidate Answer Generation, Candidate Answer Scoring, Answer Merging and Confidence Scoring. MRR - mean reciprocal rank - a common evaluation metric for factoid question answering ## Chapter 24: Chatbots & Dialogue Systems Properties of Human Conversation: - turns - a dialogue is a sequence of turns, turn structure have important implications for spoken dialogue - a system needs to know when to stop talking and also needs to know when user is done speaking. - speech acts : - constatives - committing the speaker to something's being the case (answering, claiming, denying, confirming, disagreeing) - directives - attempts by the speaker to get the addressee to do something (advising, asking, forbidding, inviting, ordering, requesting) - commissives - committing the speaker to some future course of action (promising, planning, vowing, betting, opposing) - acknowledgments - express the speaker's attitude regarding the hearer with respect to some social action ( apologising, greeting, thanking, accepting, thanking) - grounding - this means acknowledging that the hearer has understood the speaker (like ACK in TCP), humans do this all the time for example using OK - sub-dialogues and dialogue structure: - questions set up an expectation for an answer, proposals are followed by acceptance / rejection, ... - dialogue systems aren't always followed immediately by their second pair part, they can be separated by a side sequence (or sub-dialogue) - correction sub-dialogue, clarification question or presequence (Can you make a train reservations? Yes I can. Please, do ...) - initiative - sometimes a conversation is completely controlled by one participant, for humans it is more natural that initiative shifts from one person to another - inference - speaker uses provides some information and another information needs to be derived from that information ( When in May do you want to travel? I have a meeting from 12th to 15th.) Because of characteristics of human conversations it is difficult to build dialogue systems that can carry on natural conversations. Chatbots - the simplest form of dialogue systems. Chatbots have 3 categories: - rule based chatbots - for example ELIZA based on psychological research, created in 1966, the most important chatbot. Few years later PARRY was created - this chatbot had model of its own mental state (fear, anger, ...) - first known system to pass the Turing test (1972) - psychiatrists couldn't distinguish text transcripts of interviews with PARRY from transcripts of interviews with real paranoids (!!!) - corpus-based chatbots - instead of using hand-built rules, mine conversations of human-human conversations. Requires enormous data for training. Most methods use retrieval methods (grab response from some document) or generation methods (language model or encoder-decoder to generate the response given the dialogue context). - hybrid of 2 above Task-based dialogue - a dialogue system has the goal of helping a user solve some task like making an airplane reservation or buying a product. GUS - influential architecture form 1977 for travel planning. The control architecture for frame-based (frame - kind of knowledge structure representing the kinds of intentions the system can extract from user sentences) dialogues systems is used in various modern systems like Siri, Google Assistant or Alexa. The system's goal is to fill the slots in the frame with the fillers the user intends and the preform relevant action for the user. To do this system asks questions associated with frames. This is heavily rule-based approach. Slot-filling - task of domain and intent classification. If dialogue system misrecognizes or misunderstands an utterance, the user will generally correct the error by repeating or reformulating the utterance. Modern systems often ask for confirmation or rejection if input data is correct. The explicit confirmation eliminates risk of mistakes, but awkward and increases the length of conversation. System might as clarification questions. Dialogue systems might be evaluated using different metrics, e.g. engagingness, avoiding repetition, making sense. Commonly used high-level metric is called acute-eval - annotator looks at two conversations and choose the one in which the dialogue system participant performed better. Automatic metrics are generally not used for chatbots. However, there are some attempts to train a Turing-like evaluator classifier to distinguish a human-generated responses and machine-generated responses. The study of dialogue systems is closely liked with the field of Human-Computer Interaction. Ethical issues also need to be taken into consideration when designing system - famous example Microsoft's Tay chatbot (adversarial attacks). ML models amplify stereotypes and also raise privacy concerns. ## Chapter 25: Phonetics ## Chapter 26: Automatic Speech Recognition and Text-to-speech ================================================ FILE: books/peopleware.md ================================================ [go back](https://github.com/pkardas/learning) # Peopleware: Productive Projects and Teams Book by Tom DeMarco and Tim Lister - [Chapter 1: Somewhere today, a project is failing](#chapter-1-somewhere-today-a-project-is-failing) - [Chapter 2: Make a cheeseburger, sell a cheeseburger](#chapter-2-make-a-cheeseburger-sell-a-cheeseburger) - [Chapter 3: Vienna waits for you](#chapter-3-vienna-waits-for-you) - [Chapter 4: Quality - if time permits](#chapter-4-quality---if-time-permits) - [Chapter 5: Parkinson's Law revisited](#chapter-5-parkinsons-law-revisited) - [Chapter 6: Laetrile](#chapter-6-laetrile) - [Chapter 7: The Furniture Police](#chapter-7-the-furniture-police) - [Chapter 8: You never get anything done around here between 9 and 5](#chapter-8-you-never-get-anything-done-around-here-between-9-and-5) - [Chapter 9: Saving money on space](#chapter-9-saving-money-on-space) - [Chapter 10: Brain Time versus Body Time](#chapter-10-brain-time-versus-body-time) - [Chapter 11: The Telephone](#chapter-11-the-telephone) - [Chapter 12: Bring Back the Door](#chapter-12-bring-back-the-door) - [Chapter 13: Taking Umbrella Steps](#chapter-13-taking-umbrella-steps) - [Chapter 14: The Hornblower Factor](#chapter-14-the-hornblower-factor) - [Chapter 15: Let's talk about Leadership](#chapter-15-lets-talk-about-leadership) - [Chapter 16: Hiring a Juggler](#chapter-16-hiring-a-juggler) - [Chapter 17: Playing well with others](#chapter-17-playing-well-with-others) - [Chapter 18: Childhood's end](#chapter-18-childhoods-end) - [Chapter 19: Happy to be here](#chapter-19-happy-to-be-here) - [Chapter 20: Human Capital](#chapter-20-human-capital) - [Chapter 21: The Whole is greater than the sum of the Parts](#chapter-21-the-whole-is-greater-than-the-sum-of-the-parts) - [Chapter 22: The Black Team](#chapter-22-the-black-team) - [Chapter 23: Teamicide](#chapter-23-teamicide) - [Chapter 24: Teamicide Revisited](#chapter-24-teamicide-revisited) - [Chapter 25: Competition](#chapter-25-competition) - [Chapter 26: A spaghetti dinner](#chapter-26-a-spaghetti-dinner) - [Chapter 27: Open Kimono](#chapter-27-open-kimono) - [Chapter 28: Chemistry for Team Formation](#chapter-28-chemistry-for-team-formation) - [Chapter 29: The self-healing system](#chapter-29-the-self-healing-system) - [Chapter 30: Dancing with Risk](#chapter-30-dancing-with-risk) - [Chapter 31: Meetings, Monologues, and Conversations](#chapter-31-meetings-monologues-and-conversations) - [Chapter 32: The ultimate management sin is ...](#chapter-32-the-ultimate-management-sin-is-) - [Chapter 33: E(vil) Mail](#chapter-33-evil-mail) - [Chapter 34: Making change possible](#chapter-34-making-change-possible) - [Chapter 35: Organizational learning](#chapter-35-organizational-learning) - [Chapter 36: The making of community](#chapter-36-the-making-of-community) - [Chapter 37: Chaos and Order](#chapter-37-chaos-and-order) - [Chapter 38: Free Electrons](#chapter-38-free-electrons) - [Chapter 39: Holgar Dansk](#chapter-39) ## Chapter 1: Somewhere today, a project is failing "Politics" is the most frequently cited cause of failure. "Politics" for people mean: communication problems, staffing problems, lack of motivation, and high turnover. English language provides a much more precise term - sociology. > The major problems of our work are not so much technological as sociological in nature. We tend to focus on the technical rather than the human side of the work, because it is easier to do: new hard drive installation vs figuring out why somebody is dissatisfied with the company. Manager should concentrate on sociology, not on technology. Human interactions are complicated and never very crisp and clean in their effects, but they matter more than any other aspect of work. ## Chapter 2: Make a cheeseburger, sell a cheeseburger The "make a cheeseburger, sell a cheeseburger" mentality, can be fatal in your development area: - Make the machine (the human machine) run as smoothly as possible - Take a hard line about people goofing off on the job - Treat workers as interchangeable pieces of the machine - Optimize the steady state - Standardize procedure, do everything by the book - Eliminate experimentation - that's what the folks at the headquarters are paid for To manage thinking workers effectively, managers should take measures nearly opposite to those listed above: - encourage people to make some errors - ask people what dead-end roads they have been down, and making sure they understand that "none" is not the best answer - you may be able to kick the people to make them active, but not to make them creative, inventive and thoughtful, there is nothing more discouraging to any worker than the sense that his own motivation is inadequate and has to be "supplemented" by the boss - every worker is unique, uniqueness is what makes project chemistry vital and effective - the catalyst is important because the project is always in a state of flux, someone who can help a project to jell is worth two people who just do work - managers pay too little attention to how well each team member fits into the effort as a whole - workers need time for brainstorming, investigating new methods, figuring out how to avoid doing some subtasks, reading, training, and just goofing off ## Chapter 3: Vienna waits for you Your people are aware of the one short life that each person is allotted. There has to be something more important than the silly hob they are working on. Overtime - for every hour of overtime, there will be more or less an hour of undertime. The trade-off might work to management's advantage in the short term, but in the long term it will cancel out. Overtime is like sprinting: it makes some sense for the last 100m of the marathon for those with any energy left, but if you start sprinting in the first kilometer, you are wasting time. Workaholism is an illness, but common-cold-like, everyone has a bout of it now and then. If a manager tries to exploit workaholics, they will eventually lose them. Realization that one has sacrificed a more important value (family, love, home, youth) for a less important value (work) is devastating. Typical things companies do to improve productivity, they make the work less enjoyable and less interesting: - pressure people to put in more hours - mechanize the process of product deployment - compromise the quality of the product - standardize procedures Next time you hear someone talking about productivity, listen carefully to hear if the speaker uses the term "employee turnover". Chances are low. > People under time pressure don't work better - they just work faster. In order to work faster, they may have to sacrifice the quality of the product and of their work experience. ## Chapter 4: Quality - if time permits Man's character is dominated by a small number of basic instincts: survival, self-esteem, reproduction, territory, ... Even the slightest challenge to one of these built-in values can be upsetting. We all tend to tie our self-esteem to the quality of the product we produce. Any step you take that may jeopardize the quality of the product is likely to set the emotions of your staff directly against you. Workers kept under extreme time pressure will begin to sacrifice quality. They will hate what they are doing, but what other choice do they have? > Some of my folks would tinker forever on a task, all in the name of _QUALITY_. But the market doesn't give a damn > about that much quality - it is screaming for the product to be delivered yesterday and will accept it even in a > quick-and-dirty state **WRONG.** The builders' view of quality is very different - their self-esteem is strongly tied to the quality of the product, they tend to impose quality standards of their own. _Quality, for beyond that required by the end user, is a means to higher productivity._ In some Japanese companies, the project team has an effective power of veto over delivery of what they believe to be a not-yet-ready product. No matter that the client would be willing to accept even a substandard product, the team can insist that delivery wait until its own standards are achieved. ## Chapter 5: Parkinson's Law revisited Parkinson's Law: > Work expands to fill the time allocated for it Parkinson's Law gives managers the strongest possible conviction that the only way to get work done at all is to set an impossibly optimistic delivery date. Parkinson's Law almost certainly doesn't apply to your people. Treating people as Parkinsonian workers doesn't work - it can only demean and demotivate them. Parkinson's Law didn't catch on because it was so true, it caught on because it was funny. Programmers seem to be a bit more productive after they have done the estimate themselves, compared to cases in which the manager did it without even consulting them. According to 1985 Jeffery-Lawrence study - projects on which the boss applied no schedule pressure whatsoever ("Just wake me up when you are done") had the highest productivity of all. ## Chapter 6: Laetrile Laetrile - colorless liquid pressed from the apricot pits. Can be used for baking like any extract, in Mexico you can buy it for $50 to "cure" fatal cancer. Similarly, lots of managers fall into a trap of technical laetrile that purports to improve productivity. The 7 false hopes of software management: 1. There is some new trick you have missed that could send productivity soaring 2. Other managers are getting gains of 100-200% or more 3. Technology is moving so swiftly that you are being passed by 4. Changing languages will give you huge gains 5. Because of the backlog, you need to double productivity immediately 6. You automate everything else: isn't it about time you automated away your software development staff? 7. Your people will work better if you put them under a lot of pressure What is management: > The manager's function is not to make people work, but to make it possible for people to work. ## Chapter 7: The Furniture Police The work space given to intellect workers is usually noisy, interruptive, un-private and sterile. SOme are prettier than others, but not much more functional. Police-mentality planners design workspaces the way they would design prison - optimized for containment at minimal cost. As long as workers are crowded into noisy, sterile, disruptive space, it is not worth improving anything but the workspace. ## Chapter 8: You never get anything done around here between 9 and 5 To be productive, people may come in early or stay late or even try to escape entirely, by staying home for a day to get a critical piece of work done. Staying late or arriving early or staying home to work in peace is a damning indictment of the office environment. Two people from the same organization tend to perform alike. The best performers are clustering in some organizations while the worst performers are clustering in others. Many companies provide developers with a workplace that is too crowded, noisy, and interruptive as to fill their days with frustration. That alone could explain reduced efficiency as well as tendency for good people to migrate elsewhere. If you participate in or manage a team of people who need to use their brains during the workday, then the environment is your business. ## Chapter 9: Saving money on space It is surprising how little the potential savings are compared to the potential risk. The entire cost of work space for developer is a small percentage of the salary paid to the developer - 20:1 ratio. People need the space and quiet in order to perform optimally. Noise is directly proportional to density, so halving the allotment of space per person can be expected to double the noise. Saving money on space may be costing you a fortune. ## Chapter 10: Brain Time versus Body Time In the office: 30% of the time, people are noise sensitive, and the rest of the time, they are noise generators. Each time you are interrupted, you require an additional immersion period to get back into flow. During this immersion, you are not really doing work. People have to be reassured that it is not their fault if they can only manage one or two uninterrupted hours a week - rather it is the organization's fault for not providing a flow-conductive environment. None of this data can go to the Payroll Department. The collection of uninterrupted-hour data can give you some meaningful metric evidence of just good or bad your environment is. ``` E-Factor = Uninterrupted Hours / Body-Present Hours ``` ## Chapter 11: The Telephone When you are doing think-intensive work like design, interruptions are productivity killers. When you are doing sales and marketing support, you have to take every single call that comes in. Mixing flow and highly interruptive activities is a recipe for nothing but frustration. "Leave me alone, I am working" ethic can emerge. People must learn that it is okay sometimes not to answer the phone, and their managers need to understand that as well. That is the character of knowledge workers' work: The quality of their time is important, not just its quantity. ## Chapter 12: Bring Back the Door There are some prevalent symbols of success and failure in creating a sensible workplace. The most obvious symbol of success is the door. When there are sufficient doors, workers can control noise and interruptibility to suit their changing needs. Don't expect the Establishment to roll over and play dead just because you begin to complain. There at least 3 counterarguments to surface almost immediately: - People don't care about glitzy office space. They are too intelligent for that. And the ones who do care are just playing status games. - Appearance is stressed far too much in workplace design. What is more relevant is whether the workplace lets you work or inhibits you. - Maybe noise is a problem, but there are cheaper ways to deal with it than mucking around with the physical layout. We could just pipe in white noise of Muzak and cover up the disturbance. - You can either treat the symptom or treat the cause. Treating the cause means choosing isolation in the form of noise barriers - walls and doors - and these cost money. Treating the symptom is much cheaper, when you install Muzak or some other form of pink noise, you can save even more money by ignoring the problem. - Enclosed offices don't make for a vital environment. We want people to interact productively, and that is what they want, too. So walls and doors would be a step in the wrong direction. - Enclosed offices don't have to be one-person offices. 2, 3, 4-person office makes a lot more sense. Management, at its best, should make sure there is enough space, enough quiet, and enough ways to ensure privacy so that people can create their own sensible work space. ## Chapter 13: Taking Umbrella Steps > People cannot work effectively if their workspace is too enclosed or too exposed. A good workspace strikes the > balance. You feel more comfortable in a workspace if there is a wall behind you. There should be no blank wall closer > than 2.5m in front of you (eye relief). You should not be able to hear noises very different from the kind you make, > from your workspace. Workspaces should allow you to face in different directions. > Rooms without a view are like prisons for the people who have to stay in them ~ Christopher Alexander, _A Pattern Language_ ## Chapter 14: The Hornblower Factor Hornblower is the ultimate manager - his career advanced from midshipman to admiral through the same blend of cleverness, daring, political maneuvering and good luck. Managers are supposed to use their leadership skills to bring out untapped qualities in each subordinate - this is not realistic. The manager doesn't have enough leverage to make a difference in person's nature. So the people who work for you through whatever period will be more or less the same at the end as they were at the beginning. If they are not right for the job from the start, they will never be. Getting the right people in the first place is all important. Most hiring mistakes result from too much attention to appearance. Evolution has planted in each of us a certain uneasiness toward people who differ by very much from the norm. The need for uniformity is a sign of insecurity on the part of management. Strong managers don't care when team members cut their hair or whether they wear ties. Their pride is only to their staff's accomplishments. Companies sometimes impose standards of dress, they remove considerable discretion from the individual. The effect is devastating - people can talk and think of nothing else, all useful work stops dead. The most valuable people begin to realize that they aren't appreciated for their contributions, but for haircuts and neckties. The term _unprofessional_ is often used to characterize surprising and threatening behaviour. Anything that upsets the weak manager is almost by definition unprofessional - long hair on male's head, comfortable shoes, dancing around desk, laughing, ... Second thermodynamic law of management: _Entropy is always increasing in the org._ - That's why most elderly institutions are tighter and a lot of less fun than sprightly young companies. ## Chapter 15: Let's talk about Leadership One of the worst dreadful "motivational" posters says: "The speed of the leader sets the rate of the pack" == work-extraction mechanism, purpose is to increase quantity, not quality - work harder, stay loner, stop goofing off. Leadership is not about extracting anything from somebody - it is about service, while they sometimes set explicit directions, their main role is that of a catalyst, not a director. Rebellious leadership is important in order to innovate - they should supply time to innovate (take a person away from doing billable work). Nobody knows enough to give permission to the key innovators to do what needs to be done. That's why leadership as a service almost always operates without official permission. > If companies were more likely inclined to let leadership arise naturally, they wouldn't need to produce so much hot > air talking about it. ## Chapter 16: Hiring a Juggler You are hiring a person to produce, you need to examine a sample of those products to see the quality of work the candidate does. Otherwise, the interview is just a talk. You can show off your portfolio as part of each interview. Aptitude tests are almost always oriented toward the tasks the person will perform immediately after being hired. They test if person is likely to perform immediately after being hired. Aptitude tests are left-brain oriented. The aptitude test may give you people who perform better in the short term, but are less likely to succeed later on. Use them, but not for hiring. Hiring process needs to focus on at least some sociological and human communication traits. Ask a candidate to prepare 10-15 minute presentation on some aspect of past work (technology, management, project) - you will be able to see candidate's communication skills. ## Chapter 17: Playing well with others The capacity of a team to absorb newness has its limits. Team jell takes time, and, during much of that time, the composition of the team can't be changing. If you need to use a reactive strategy of labor, your team will probably never jell. In fact, the workspace you manage almost certainly will not be a team at all. ## Chapter 18: Childhood's end For youngest employees, computers, smartphones, the Web, programming, hacking, social networking, and blogging are environment, not technology. Young people divide their attention while their older colleagues tend to focus on one or possibly two tasks at once. Continuous partial attention is the opposite of flow. There is a difference between spending 2% of time on Facebook in a single block of time vs spending 2% of attention all day on Facebook. Articulating requirements to young workers is going to be essential to give them a chance to fit in. ## Chapter 19: Happy to be here Typical turnover figures are in the range of 80-33%/year => average employee longevity averages between 15 and 36 months. The average person leaves after a little more than two years. It costs 1.5-2 months' salary to hire a new employee (agency or in-house HR). A new employee is quite useless on Day Zero (or even less than useless), after few months the new person is doing some useful work. Within 5 months, he/she is at full working capacity. The total cost of replacing each person is the equivalent of 4.5-5 months of employee cost or about 20% of the cost of keeping that employee for the full 2 years on the job. And the is only the visible cost of turnover. In companies with high turnover, people ten toward a destructively short-term viewpoint, because they know they just aren't going to be there very long. In an organization with high turnover, nobody is willing to take the long view. If people only stick around for a year or two, the only way to conserve the best people is to promote them quickly. From the corporate perspective, late promotion is a sign of health. Reasons account for most departures: - A just-passing-through mentality - no feelings of long-term involvement in the job - A feeling of disposability - workers as interchangeable parts (since turnover is really high, nobody is indispensable) - A sense that loyalty would be ludicrous - who would be loyal to an org that views its people as parts People leave quickly -> no spending money on training -> no investment in the individual -> individual thinks of moving on. The best companies are consciously striving to be best. People tend to stay at such companies because there is a widespread sense that you are expected to stay. A common feature of companies with the lowest turnover is widespread retraining (you are forever bumping into managers and officers who started out as secretaries, payroll clerks, or in the mailroom). ## Chapter 20: Human Capital Companies that manage their investment sensibly will prosper in the long run. Companies of knowledge workers have to realize that it is their in human capital that matters most. The good ones already do. ## Chapter 21: The Whole is greater than the sum of the Parts Jelled Team - a group of people so strongly knit that the whole is greater than the sum of parts. The production of such team is greater than that of the same people working in unjelled team. Once a team begins to jell, the probability of success goes up dramatically. They don't need to be managed in the traditional sense, and they certainly don't need to be motivated. They have got momentum. Believing that workers will automatically accept organizational goals is the sign of naive managerial optimism. > The purpose of a team is not goal attainment but goal alignment Signs of jelled team: - low turnover - strong sense of identity (colourful name) - sense or eliteness (part of something unique, this attitude might be annoying to people outside the group) - joint ownership ## Chapter 22: The Black Team The story about the legendary, jelled team - The Black Team. ## Chapter 23: Teamicide You can't control jelling - the process is too fragile to be controlled. Exact steps are hard to describe, the opposite is easier. Teamicide techniques: - Defensive management - let your people make mistakes, do not send a message that making errors is forbidden - "My people are too dumb to build systems without me" - people who feel untrusted have little inclination to bond together into a cooperative team - Bureaucracy - mindless paper pushing hurts team formation - Physical separation - group members may grow stronger bonds to non-group neighbours, just because they see more of them - putting people together gives them opportunity for the casual interaction that is so necessary for team formation - Fragmentation of people's time - bad for team formation and efficiency - no one can be part of multiple jelled teams - Quality reduction of the product - typical scenario: deliver a product in less time = lower quality - self-esteem and enjoyment are undermined by the necessity of building a product of clearly lower quality than what they are capable of - Phony deadlines - the date mentioned is impossible to meet, and everyone knows it - team will not jell in such environment - Clique control - there are no jelled teams at managerial level - as you go higher and higher in the organization chart, the concept of jelled teams reduces further into oblivion ## Chapter 24: Teamicide Revisited 2 additional kinds of teamicide: - motivational posters - are phony enough to make most people's skin crawl - overtime - error, burnout, accelerated turnover, and compensatory undertime - disrupts team ## Chapter 25: Competition Coaching is an important factor in successful team interaction. It provides coordination, personal growth and feels good. We feel a huge debt to those who have coached us in the past. The act of coaching cannot take place if people don't feel safe. In competitive atmosphere, you would be crazy to let anyone see you sitting down to be coached. You would be similarly crazy to coach someone else, as that person may eventually yse your assistance to pass you by. Anything the manager does to increase the competition within a team has to be viewed as teamidical. ## Chapter 26: A spaghetti dinner Good managers provide frequent easy opportunities for the team to succeed together. The opportunities may be tiny pilot subprojects, or demonstrations, os simulations, anything that gets the team quickly into the habit of succeeding together. ## Chapter 27: Open Kimono The Open Kimono attitude is the opposite of defensive management. You take no steps to defend yourself from the people you have put into positions of trust. A person you can't trust with any autonomy is of no use to you. If you have got decent people under you, there is probably nothing you can do to improve their chances of success more dramatically than to get yourself out their hair occasionally. Visual supervision is for prisoners. ## Chapter 28: Chemistry for Team Formation Some organizations are famous for their consistent good luck in getting well-knit teams to happen. It isn't luck - it's chemistry. These organizations are just plain healthy. Signs of a health organization: - people at ease - people having a good time - people enjoying interactions with their peers - no defensiveness - the work is a joint product - everybody is proud of the quality - managers devote their energy to build and maintain healthy chemistry Chemistry-building strategy: - Make a cult of quality - cult of quality is the strongest catalyst for team formation - Provide lots of satisfying closure - people need reassurance from time to time that they are headed in the right direction - Build a sense of eliteness - people require a sense of uniqueness to be at peace with themselves, and they need to be at peace with themselves to let the jelling process begin - Allow and encourage heterogeneity - diverse teams are more fun to work in - Preserve and protect successful teams - Provide strategic but not tactical direction Managers are usually not part of the teams that they manage. On the best teams, different individuals provide occasional leadership, taking charge in areas where they have particular strengths. ## Chapter 29: The self-healing system A Methodology - a general theory of how a whole class of thought-intensive work ought to be conducted. _The people who carry write the Methodology are smart. The people who carry it out can be dumb._ There is a big difference between Methodology and methodology - methodology is a basic approach one takes to get a job done. It doesn't reside in a fat book, but rather inside the heads of people carrying out the work. Big M Methodology is an attempt to centralize thinking. All meaningful decisions are made by the Methodology builders, not by the staff assigned to do the work. > Voluminous documentation is part of the problem, not part of the solution. People should focus on getting things done, > instead of building documents. People might actually do exactly what the Methodology says, and the work would grind nearly to a halt. ## Chapter 30: Dancing with Risk Our main problems are more likely to be sociological than technological in nature. Projects that have real value but little or no risk were all done ages ago. The ones that matter today are laden with risk. Risk management: it is not to make the risk go away, but to enable sensible mitigation - planned and provisioned well ahead of time. ## Chapter 31: Meetings, Monologues, and Conversations Some orgs are addicted to meetings, at the other extreme, some orgs refuse to use the "M" word at all. As orgs age, meeting time increases until there is time for nothing else. Even short stand-ups can be a drag on an organization's effectiveness is they lack purpose and focus. In order to cure meeting-addicted org, start small and eliminate most ceremonial meetings in your area, spend time in one-on-one conversations, limit attendance at working meetings. Encourage Open-Space networking to give people the chance to have unstructured interaction. ## Chapter 32: The ultimate management sin is ... wasting people's time. WHen participants of a meeting take turns interacting with one key figure, the expected rationale for assembling the whole group is missing - the boss might as well have interacted separately with each of the subordinates. Fragmented time is almost certain teamicidal, but also is guaranteed to waste the individual's time. The human capital invested in your workforce also represents a ton of money. ## Chapter 33: E(vil) Mail When you over-coordinate the people who work for you, they are too likely to under-coordinate their own efforts. But self-coordination and mutual coordination amon peers is the hallmark of graceful teamwork. Imagine how it would work if every pass could only happen if and when the coach gave the signal from the sideline. A decent coach understands that his/her job is to help people learn to self-coordinate. > Life is short. If you need to know everything in order to do anything, you are not going to get much done. ## Chapter 34: Making change possible > People hate change, and that is because people hate change. People really hate change, they really, really do. When we start out to change, it is never certain that we will succeed. The uncertainty is more compelling than the possible gain. > The fundamental response to change is not logical, but emotional **You can never improve if you can't change at all.** Change involves at least 4 stages: Old Status Quo -> Chaos -> Practice and Integration -> News Status Quo. Change happens upon introduction of a foreign element: a catalyst for a change. Without a catalyst, there is no recognition of the desirability of change. Change won't get even started unless people fell safe - people feel safe when they know they will not be demeaned for proposing a change. Change has only a chance of succeeding if failure is also okay. ## Chapter 35: Organizational learning Learning is a critical improvement mechanism - non-learners can not expect to prosper for very long without learning. Experience gets turned into learning when an organization alters itself to take account of what experience has shown. > Learning is limited by an organization's ability to keep its people When turnover is high, learning is unlikely to stick or can't take place at all. In such an organization, attempts to change skills or to improve redesigned procedures are an exercises in futility. ## Chapter 36: The making of community What great managers do best? The making of community. A need for community is something that is built right into the human firmware. Community doesn't just happen on the job. It has to be made. The people who make it are the unsung heroes of our work experience. An org that succeeds in building a satisfying community tends to keep its people. No one wants to leave. The investment made in human capital is retained, and upper management is willing to invest more. When the company invest more in its people, the people perform better and feel better about themselves and about their company. There is no formula to build community in the workplace. Some experimenting is needed. ## Chapter 37: Chaos and Order There is something about human nature that makes us the implacable enemies of chaos. People who were attracted to the lack of order, feel nostalgic fondness foe the days when everything wasn't so awfully mechanical. Some lost disorder can be reintroduced to breath some energy into the work - a policy of constructive reintroduction of small amounts of disorder: 1. Pilot projects - set the fat book of standards aside and try some new unproved technique - people get the boost in energy when they are doing something new and different 2. War games - war games help you evaluate your relative strengths and weaknesses and help the organization to observe its global strengths and weaknesses - a bug fuss should be made over any and all accomplishments 3. Brainstorming - interactive session, targeted on creative insight - focus on quantity of ideas, not quality, keep proceedings loose, even silly, discourage negative comments 4. Provocative training experiences 5. Training, trips, conferences, celebrations, and retreats - everybody relishes a chance to get out of the office - when a team is forming, it makes a good business sense to fight for travel money to get team members out of office together - adventure adds small amounts of constructive disorder ## Chapter 38: Free Electrons Free electrons - workers having a strong role in choosing their own orbits. Positions with loosely stated responsibilities so that the individual has a strong say in defining the work. Companies profit from such people. Some individuals need to be left alone to work out some matters, or at least free to seek guidance if and when and from whomever he or she chooses. The mark of the best manager is an ability to single out the few key spirits who have the proper mix of perspective and maturity and then turn them loose. ## Chapter 39: Holgar Dansk A single person acting alone is not likely to effect any meaningful change. But there is no need to act alone. When something is terribly out of kilter, it takes very little to raise people's consciousness of it. Then it is no longer you. It is everyone. It may be small voice saying: "This is unacceptable" -- people know it is true. Once it has been said out loud, they can't ignore it any longer. Sociology matters more than technology or even money. It is supposed to be productive, satisfying fun to work. If it isn't, there is nothing else worth concerning on. Choose your terrain carefully, assemble your facts, and speak up. You can make a difference. ================================================ FILE: books/pragmatic-programmer.md ================================================ [go back](https://github.com/pkardas/learning) # The Pragmatic Programmer: journey to mastery, 20th Anniversary Edition Book by David Thomas and Andrew Hunt - [Chapter 1: A Pragmatic Philosophy](#chapter-1-a-pragmatic-philosophy) - [Chapter 2: A Pragmatic Approach](#chapter-2-a-pragmatic-approach) - [Chapter 3: The Basic Tools](#chapter-3-the-basic-tools) - [Chapter 4: Pragmatic Paranoia](#chapter-4-pragmatic-paranoia) - [Chapter 5: Bend, or Break](#chapter-5-bend-or-break) - [Chapter 6: Concurrency](#chapter-6-concurrency) - [Chapter 7: While you are coding](#chapter-7-while-you-are-coding) - [Chapter 8: Before the Project](#chapter-8-before-the-project) - [Chapter 9: Pragmatic Projects](#chapter-9-pragmatic-projects) - [Postface](#postface) ## Chapter 1: A Pragmatic Philosophy **You Have Agency.** It is your life. You own it. You run it. You create it. This industry gives you a remarkable set of opportunities. Be proactive, and take them. The team needs to be able to trust you and rely on you, and you need to be comfortable relying on each of them as well. In a healthy environment based in trust, you can safely speak your mind, present your ideas, and rely on your team members who can in turn rely on you. **Provide options, don't make lame excuses.** Instead of excuses provide options. Don't say it can't be done: explain what can be done to salvage the solution. When you find yourself saying "_I don't know_" be sure to follow it up with "_--but I'll find out_". It is a great way to admit what you don't know, but then take responsibility like a pro. _Entropy_ - a term from physics that refers to the amount of "disorder" in a system. The entropy in the universe tends toward a maximum. When disorder increases in software, we call it "software rot". Some folks might call it by the more optimistic term "_technical debt_" (with the implied notion that they will pay it back someday, they probably will not). **Don't live with broken windows.** Bad designs, wrong decisions, or poor code. Fix each one as soon as it is discovered. If there is no sufficient time to fix it properly, board it up. Take some action to prevent further damage and to show that you are on top of the situation. Don't let entropy win. If you find yourself working on a project with quite a few broken windows, it is all to easy to slip into the mindset of "_All the rest of this code is crap, I will just follow suit._". By the same token, if you find yourself on a project where the code is beautiful, well-designed, and elegant - you will likely take extra special care not to mess it up. Idea: Help strengthen your team by surveying your project neighbourhood. Choose two or three broken windows and discuss with your colleagues what the problems are and what could be done to fix them. **Be a catalyst for change.** You may be in a situation where you know exactly what needs doing and how to do it. People will form committees, budgets will need approval, and things will get complicated. Work out what can you reasonably ask for. Develop it well. Once you have got it, show people, and let them marvel. Sit back and wait for them to start asking you to add the functionality you originally wanted. Show them a glimpse of the future, and you will get them to rally around. **Remember the Big Picture.** Constantly review what is happening around you, not just what you personally are doing. Projects slowly and inexorably get totally out of hand. Most software disasters start out too small to notice, and most projects overruns happen a day at a time. It is often the accumulation of small things that breaks morale and teams. Situational awareness (is there anything out of context, anything that looks like it doesn't belong), a technique practiced by folks ranging from Boy and Girl Scouts and Navy SEALs. Get in a habit of really looking and noticing your surroundings. **Make quality a requirements issue.** Involve your users in determining the project's real quality requirements. > An investment in knowledge always pays the best interest ~ Benjamin Franklin **Invest regularly in your knowledge portfolio.** Your knowledge and experience are your most important day-to-day professional assets. Knowledge may become out of date, as the value of your knowledge declines, so does your value to your company or client. 1. Invest regularly - invest in knowledge regularly, even small amounts. 2. Diversify - the more different things you know, the more valuable you are. 3. Manage risk - don't put all your technical eggs in one basket. 4. Buy low, sell high - learning an emerging technology before it becomes popular can be just as hard as finding an undervalues stock, but the payoff can be just as rewarding. 5. Review and rebalance - that hot technology you started investing last month might be stone-cold by now. Goals: - learn at least one programming language per year - by learning several approaches, you can broaden your thinking - read a technical book each month - read nontechnical books too - don't forget the human side of the equation, as that requires an entirely different skill set - take classes - look for interesting courses at local or online college - participate in local user groups and meetups - isolation can be deadly to your career, find out what people are working on outside of your company - experiment with different environments - try Linux, Windows, Mac, a new IDE, ... - stay current - read news and posts online on technology different from that of your current project **Critically analyze what you read and hear.** You need to ensure that the knowledge in your portfolio is accurate and unswayed by either vendor or media hype. _Critical Thinking Tutorial:_ 1. Ask the "Five Whys" - ask why at least 5 times. Ask a question and get an answer. Dig deeper by asking "why". 2. Who does this benefit? - "follow the money" can be a very helpful path to analyze. The benefits to someone else or another organization may be aligned with your own, or not. 3. What is the context? - everything occurs in its own context. Good for someone, doesn't mean it is good for you. 4. Why is this a problem? - is there an underlying model? How does the underlying model work? **English is just another programming language.** Having the best ideas, the finest code, or the most pragmatic thinking is ultimately sterile unless you can communicate with other people. **It is both what you say and the way you say it.** There is no point in having great ideas if you don't communicate them effectively. The more effective communication, the more influential you become. **Build documentation in, don't bolt it on.** It is easy to produce good-looking documentation from the comments in source code, and we recommend adding comments to modules and exported functions to give other developers a leg up when they come to use it. Restrict your non-API commenting to discussing why something is done, its purpose and its goal. The code already shows how it is done, so commenting on this is redundant - and is a violation of the DRY principle. ## Chapter 2: A Pragmatic Approach **Good design is easier to change than bad design.** A thing is well-designed if it adapts to the people who use it. Code should be Easy To Change. That's why SRP, decoupling, naming, ... are important, because of ETC. **DRY - Don't Repeat Yourself.** Every piece of knowledge must have a single, unambiguous, authoritative representation within a system. Most people maintenance begins when an application is released, that maintenance means fixing bugs and enhancing features. This is wrong. Programmers are constantly in maintenance mode. Maintenance is not a discrete activity, but a routine part of the entire development process. When we perform maintenance, we have to find and change the representation of things. It is easy to duplicate knowledge in the specifications, processes, and programs we develop, and when we do so, we invite a maintenance nightmare. DRY is about the duplication of knowledge, of intent. It is about expressing the same thing in two different places, possibly in two totally different ways. Code may be the same, but the knowledge they represent may be different, and this is not a duplication, that is a coincidence. > All services offered by a module should be visible through a uniform notation, which does not betray whether they are > implemented through storage of through computation. **Make it easy to reuse.** You should foster an environment where it is easier to find and reuse existing stuff than to write it yourself. If it isn't easy, people will not do it. And if you fail to reuse, you risk duplicating knowledge. Two or more things are orthogonal if changes in one do not affect any of the others. In a well-designed system, the database code will be orthogonal to the user interface - you can change the interface without affecting the database, and swap databases without changing the interface. Non-orthogonal systems are more complex to change and control. **Eliminate effects between unrelated things.** We want to design components that are self-contained - independent and with a single, well-defined purpose. When components are well isolated from one another, you know that you can change one without having to worry about the rest. As long as you don't change that component's external interfaces, you can be confident that you will not cause problems that ripple through the entire system. Modular, component-based, layered systems -> these are orthogonal systems. - Keep your code decoupled - write shy modules, modules that don't reveal anything unnecessary to other modules and that don't rely on other modules' implementations. If you need to change an object's state, get the other object to do it for you. - Avoid global data - in general, your code is easier to understand and maintain if you explicitly pass any required context into your modules. - Avoid similar functions - duplicate code is a symptom of structural problems. **There are no final decisions.** The mistake lies in assuming that any decision is cast in stone - and not in preparing for the contingencies that might arise. Think of decisions as being written in the sand at the beach. A big wave can come along and wipe them out at any time. **Forgo following fads.** Choose architecture based on fundamentals, not fashion. No one knows what the future may hold. **Use tracer bullets to find the target.** Look for important requirements, the one that define the system. Look for areas where you have doubts, and where you see the biggest risks. Then prioritize your development so that these are the first areas you code. Benefits of the tracer code: - Users get to see something working early. - Developers build a structure to work in. - You have an integration platform. - You have something to demonstrate. - You have a better feel for progress. Prototyping generates disposable code. Tracer code is lean but complete, and forms part of the skeleton of the final system. Think of prototyping as the reconnaissance and intelligence gathering that takes place before a single tracer bullet is fired. Prototypes are designed to answer just a few questions, so they are much cheaper and faster to develop than applications that go into production. You can prototype: architecture, new functionality in an existing system, structure or contents of external data, third-party tools or components, performance issues, user interface design. **Prototype to learn.** Prototyping is a learning experience. Its value lies not in the code produced, but in the lesson learned. That's really the point of prototyping. It is easy to become mislead by the apparent completeness of a demonstrated prototype, and project sponsors or management may insist on deploying the prototype. Remind them that you can build a great prototype of a new car out of balsa wood and duct tape, but you wouldn't try to drive it in rush-hour traffic. If you feel there is a strong possibility in your environment or culture that the purpose of prototype code may be misinterpreted, you may be better off with the tracer bullet approach. **Program close to the problem domain.** Try to write code using the vocabulary of the application domain. **Estimate to avoid surprises.** Estimate before you start. You will spot potential problems up front. Basic estimating trick: ask someone who's already done it. Before you get too committed to model building, cast around for someone who has been in a similar situation in the past. See how their problems got resolved. Model building can be both creative and useful in the long term. Often, the process of building the model leads to discoveries of underlying patterns and processes that weren't apparent on the surface. Building the model introduces inaccuracies into the estimating process. _PERT - Program Evaluation Review Technique_ - an estimating methodology, every PERT task has an optimistic, a most likely, and a pessimistic estimate. Using a range of values like this is a great way to avoid one of the most common causes of estimation error - padding a number because you are unsure. **Iterate the schedule with the code.** Make the management understand that the team, their productivity, and the environment will determine the schedule. By formalizing this, and refining the schedule as part of each iteration, you will be giving them the most accurate scheduling estimates you can. ## Chapter 3: The Basic Tools Tools amplify your talent. The better your tools, and the better you know how to use them, the more productive you can be. **Keep knowledge in plain text.** Text will not become obsolete. Make plain text understandable to humans. **Always use version control.** Make sure that everything is under version control: documentation, phone number lists, memos to vendors, makefiles, build and release procedures - everything. **Fix the problem, not the blame.** It doesn't really matter whether the bug is your fault or someone else's. **Don't panic.** The first rule of debugging. Don't waste a single neutron on the train of thought that begins "but that can't happen" because clearly it can, and has. **Failing test before fixing code.** We want a bug that can be reproduced with a single command. It is a lot harder to fix a bug if you have to go through 15 steps to get to the point where the bug shows up. **Read the damn error message.** Most exceptions tell both what failed and where it failed. Binary search can be used for finding releases that caused the error, determining minimal subset of values that cause program to fail. **Select isn't broken.** It is possible that a bug exists in the OS, the compiler, or a third-party product - but this should not be your first thought. It is much more likely that the bug exists in the application code under development. **Don't assume it - prove it.** Don't gloss over a routine or piece of code involved in the bug because you "know" it works. Prove it. Prove it in this context, with this data, with these boundary conditions. ## Chapter 4: Pragmatic Paranoia **You can't write perfect software.** Perfect software doesn't exist. Pragmatic Programmers don't trust themselves. Knowing that no one writes perfect code, including themselves. Pragmatic Programmers build in defenses against their own mistakes. **Design with contracts.** Be strict in what you will accept before you begin, and promise as little as possible in return. Remember, if your contract indicates that you will accept anything and promise the world in return, you have got a lot of code to write. **Crash early.** Don't catch or rescue all exceptions, re-raising them after writing some kind of message. Do not eclipse code by the error handling. Without exception handling code is less coupled. Crashing often is the best thing you can do. The Erland and Elixir languages embrace this philosophy. When your code discovers that something that was supposed to be impossible just happened, your program is no longer viable. Anything it does from this point forward becomes suspect, so terminate it as soon as possible. **Use assertions to prevent the impossible.** Whenever you find yourself thinking "but of course that could never happen" add code to check it. Assertions are also useful checks on an algorithm's operation. Assertions check for things that should never happen. LEAVE ASSERTIONS TURNED ON. **Finish what you start.** It simply means that the function or object that allocates a resource should be responsible for deallocating it. **Take small steps - always.** Always take small, deliberate steps, checking for feedback and adjusting before proceeding. Consider that the rate of feedback is your speed limit. You never take on a step or a task that is "too big" . The more you have to predict what the future will look like, the more risk you incur that you will be wrong. Instead of wasting effort designing for an uncertain future, you can always fall back on designing your code to be replaceable. Making code replaceable will also help with cohesion, coupling, and DRY, leading to a better design overall. ## Chapter 5: Bend, or Break Decoupling shows how to keep separate concepts separate, decreasing coupling. Coupling is the enemy of change, because it links together things that must change in parallel. When you are designing bridges, you want them to hold their shape - you need them to be rigid. But when you are designing software that you will want to change, you want exactly the opposite - you want it to be flexible. **Decoupled code is easier to change.** **Tell, don't ask.** (The Law of Demeter) You shouldn't make decisions based on the internal state of an object abd then update the object. Doing so totally destroys the benefits of encapsulation, and, in doing so, spreads the knowledge of the implementation thought the code. A method defined in a class C should only call: - Other instance methods - Its parameters - Methods in objects it creates - Global variables **Don't chain method calls.** (Something simpler than the Law of Demeter.) Try not to have more than one "." when you access something. The rule doesn't apply if the things you are changing are really unlikely to change (e.g. libraries that come with the language). **Avoid global data.** It is like adding extra parameter to every method. **If it is important enough to be global, wrap it in an API.** Any mutable external resource is global data (database, file system, service API, ...). Always wrap these resources behind code that you control. Keeping your code shy - having it deal with things it directly knows about, will help keep you applications decoupled, and that will make them more amenable to change. Publish/Subscribe generalizes the observer pattern, at the same time solving the problems of coupling and performance. Streams let us treat events as if they were a collection of data. It's as if we had a list of events, which got longer when new events arrive. We can treat streams like any other collection (manipulate, filter, combine). Baseline for reactive event handling: reactivex.io **Programming is about code, but programs are about data.** Start designing using transformations (unix-like pipelines). Using pipelines means that you are automatically thinking in terms of transforming data. **Don't hoard state, pass it around.** Functions greatly reduce coupling. A function can be used (and reused) anywhere its parameters match the output of some other function. There is still a degree of coupling, but it is more manageable than the OO-style of command and control. Thinking of code as a series of nested transformations can be a liberating approach to programming. It takes a while to get used to, but once you have developed the habit you will find your code becomes cleaner, your functions shorter, and your designs flatter. **Don't pay inheritance tax.** Inheritance is coupling. Not only is the child class coupled to the parent, the parent's parent, and so on, but the code that uses the child is also coupled to al the ancestors. Alternatives to inheritance: - interfaces and protocols - these declarations create no code. We can use them to create types, and any class that implements the appropriate interface will be compatible with that type. - delegation - has-a is better than is-a. If parent has 20 methods, and the subclass wants to make use of just 2 of them, its objects will still have the other 18 just lying around and callable. - mixins and traits - use them to share functionality. The basic ide is simple, we want to be able to extend classes and objects with new functionality without using inheritance. So we create a set of these functions, give that set a name, and then somehow extend a class with them. **Prefer interfaces to express polymorphism.** Interfaces and protocols give us polymorphism without inheritance. **Parametrize your app using external configuration.** When code relies on values that may change after the application has gone live, keep those values external to the app. Keep the environment and customer-specific values outside the app (credentials, logging levels, IP addresses, validation parameters, external rates - e.g. tax rates, formatting details, license keys). While static configuration is common, we currently favor a different approach. We still want configuration data kept external to the application, but rather than in a flat file ro database, we would like to see it stored behind a service API. ## Chapter 6: Concurrency Concurrency - when the execution of two or more pieces of code act as if they run at the same time (context switching). Parallelism is when they do run at the same time (multiple cores). Temporal coupling - coupling in time. Temporal coupling happens when your code imposes a sequence on things that is not required to solve the problem. **Analyze workflow to improve concurrency.** Find out what can happen at the same time, and what must happen in a strict order. One way to do this is to capture the workflow using a notation such as the activity diagram. **Shared state is incorrect state.** A semaphore is a thing that only one person can own at a time. You can create a semaphore and the use it to control some other resource. **Random failures are often concurrency issues.** Whenever tow or more instances or your code can access some resource at the same time, you are looking at a potential problem. **Use actors for concurrency without shared state.** Actors execute concurrently, asynchronously and share nothing. An actor is an independent virtual processor with its own local state. Each actor has a mailbox. When a message appears in the mailbox and the actor is idle, it kicks into life and processes the message. When it finishes processing, it processes another message in the mailbox, or goes back to sleep. **Use blackboards to coordinate workflow.** Order of data arrival is irrelevant - when a fact is posted it can trigger the appropriate rules. The output of any rules can post to the backboard and cause the triggering of yet more applicable rules. ## Chapter 7: While You Are Coding **Listen to your inner lizard.** When it feels like your code is pushing back, it is really your subconscious trying to tell you something is wrong. Learning to listen to your gut feeling when coding is an important skill to foster. But it applies to the bigger picture as well. Sometimes a design just feels wrong, or some requirements makes you feel uneasy. Stop and analyze these feelings. If you are in a supportive environment, express them out loud. Explore them. **Don't program by coincidence.** Don't rely on luck and accidental success. - Always be aware of what you are doing. - Can you explain the code, in detail, to a more junior programmer? If not, perhaps you are relying on coincidences. - Don't code in dark. If you are not sure why it works, you will not know why it fails. - Proceed from a plan. - Don't depend on assumptions. If you can't tell something is reliable, assume the worst. - Document your assumptions. - Don't just test your code, but test your assumptions as well. Don't guess, try it. Write an assertion to test your assumptions. If your assertion is right, you have improved the documentation in your code. If you discover your assumption is wrong, then count yourself lucky. - _Don't be a slave to history. Don't let existing code dictate future code. All code can be replaced if it is no longer appropriate._ **Estimate the order of your algorithms.** Estimate the resources that algorithms use - time, processor, memory, and so on. When you write anything containing loops or recursive calls, check the runtime and memory requirements. When a more detailed analysis is needed - use Big-O notation. Think of the _O_ as meaning _on the order of_. Big-O is never going to give you actual numbers for time of memory of whatever - it simply tells you how these values will change as the input changes. Common sense estimation: - simple loops - _O(n)_ - nested loops - _O(n^2)_ - binary chop - _O(log n)_ - divide and conquer - _O(n log n)_ - combinatorics - running time might run out of time, _O(n!)_ **Test your estimates.** The fastest one is not always the best for the job. Given a small input set, a straightforward insertion sort will perform just as well as a quicksort, and will take less time to write and debug. Be wary of _premature optimisation_. It is always a good idea to make sure an algorithm really is a bottleneck before investing your precious time trying to improve it. Refactoring: As a program evolves, it will become necessary to rethink earlier decisions and rework portions of code. This process is perfectly natural. Code needs to evolve - it is not a static thing. The most common metaphor for software development is building construction. Rather than a construction, software is more like a gardening - it is more organic than concrete. Refactoring is not intended to be a special, high-ceremony, once-in-a-while activity. Refactoring is a day-to-day activity, taking low risk small steps. It is a targeted, precise approach to help keep the code easy to change. You need good, automated unit testing that validates the behavior of the code. Any number of things may cause code to qualify for refactoring: - duplication - non-orthogonal design - change to one thing affects the other - outdated knowledge - usage - some features may be more important than originally thought - performance - the test pass - when you have added a small amount of code, and that extra test passes, you have a great opportunity to dive in and tidy up what you just wrote. **Refactor early, refactor often.** Time pressure is often used as an excuse for not refactoring. Fail to refactor now, and there will be a far greater time investment to fix the problem down the road. **Explain this principle to others by using a medical analogy: think of the code that needs refactoring as "a growth". Removing it requires invasive surgery. You can go in now, and take it out while it is still small. Or, you could wait while it grows and spreads - but removing it then will be both more expensive and more dangerous. Wait even longer, and you may lose the patient entirely.** How to refactor without doing more harm than good: 1. Don't try to refactor and add functionality at the same time. 2. Make sure you have good tests before you begin refactoring. Run the tests as often as possible. 3. Take short, deliberate steps. Refactoring often involves making many localized changes that result in a larger-scale change. Don't live with broken windows. **Testing is not about finding bugs.** Major benefits of testing happen when you think about and write the tests, not when you run them. **A test is the first user of your code.** Testing is vital feedback that guides your coding. _A function or method that is tightly coupled to other code is hard to test, because you have to set up all that environment._ Making your stuff testable also reduces its coupling. **Build end-to-end, not top-down or bottom up.** Build small pieces of end-to-end functionality, learning about the problem as you go. Like our hardware colleagues, we need to build testability into the software from the very beginning, and test each piece thoroughly before trying to wire them together. Chip-level testing for hardware is roughly equivalent to unit testing in software. Write test cases that ensure a given unit honors its contract. We want to test that the module delivers the functionality it promises. **Design to test.** Start thinking about testing before you write a line of code. Approaches: - Test first - TDD - probably the best choice in most circumstances. - Test during - a good fallback when TDD is not useful or convenient. - Test never - the worst choice. **Test your software, or your users will.** Make no mistake, testing is part of programming. It is not something left to other departments or staff. Testing, design, coding - it is all programming. **Use property-based tests to validate your assumptions.** Property-based tests will try things you never thought to try, and exercise your code in ways it wasn't meant to be used. For python use _Hypothesis_ framework. Hypothesis gives you a minilanguage for describing the data it should generate. **Keep it simple and minimize attack surfaces.** Bear in mind these security principles: 1. Minimize Attack Surface Area 1. Code complexity makes the attack surface larger, with more opportunities for unanticipated side effects. Think of complex code as making the surface area more porous and open to infection. Simple, smaller code is better. 2. Never trust data from an external entity, always sanitize it before passing it on to a database, view rendering, or other processing. 3. Unauthenticated services are an attack vector. Any user anywhere in the world cal call unauthenticated services. 4. Keep the number of authenticated users at an absolute minimum. Cull unused, old, or outdated users and services. If an account with development services is compromised, your entire product is compromised. 5. Don't give too much information about an error in the response. 2. Principle of Least Privilege - Every program and every privileged user of the system should operate using the least amount of privilege necessary to complete the job. 3. Don't leave personally identifiable information, financial data, passwords, or other credentials in plain text. Don't check in secrets, API keys, SSH keys, encryption passwords or other credentials alongside your code in version control. 4. Apply security patches quickly. The largest data breaches in history were caused by systems that were behind on their updates. You don't want to do encryption yourself. Even the tiniest error can compromise everything. Rely on reliable things. Take the more pragmatic approach and let someone else worry about it and use a third party authentication provider. **Name well, rename when needed.** Things should be named according to the role they play in your code. Honor the local culture (snake_case vs CamelCase vs ...). Every project has its own vocabulary - jargon words that have a special meaning to the team. It is important everyone on the team knows what these words mean. One way is to encourage a lot of communication, another way is to have a project glossary. When you see a name that no longer expresses the intent, or is misleading or confusing, fix it. ## Chapter 8: Before the Project **No one knows exactly what they want.** Requirements rarely lie on the surface. Normally, they are buried deep beneath layers of assumptions, misconceptions, and politics. **Programmers help people understand what they want.** Our job is to help people understand what they want. **Requirements are learned in a feedback loop.** Your role is to interpret what the client says and to feed back to them the implications. This is both an intellectual proces and a creative one. Your job is to help the client understand the consequences of their stated requirements. **Work with the user to think like a user.** There is a simple technique for getting inside your client's heads: become a client. **Policy is metadata.** Don't hardcode policy into a system, instead express it as metadata used by the system. **Use a project glossary.** Create and maintain a project glossary - one place that defines all the specific terms and vocabulary used in a project. It is hard to succeed on a project if users and developers call the same thing by different names. **Don't think outside the box - find the box.** When faced with an impossible problem, identify the real constraints. Ask yourself: Does it have to be done this way? Does it have to be done at all? Sometimes you find yourself working on a problem that seems much harder than you thought it should be. You may think this particula problem is "impossible". This is an ideal time to do something else for a while. Sleep on it, go walk the dog. People who were distracted did better on a complex problem-solving task than people who put in conscious effort. If you are not willing to drop the problem for a while, the next best thing is probably finding someone to explain it to ( rubber duck). Conway's Law: "_Organizations which design systems are constrained to produce designs which are copies of the communication structures of these organizations_". **Don't go into code alone.** Pair programming - the inherited peer-pressure of a second person helps against moments of weakness and bad habits of naming variables such as foo and such. You are less inclined to take a potentially embarrassing shortcut when someone is actively watching, which also results in higher-quality code. Mob programming - it is an extension of pair programming that involves more than just two developers. You can think of mob programming as tight collaboration with live coding. **Agile is not a noun, agile is how you do things.** Agile is an adjective. Remember the values from the manifesto: 1. Individuals and interactions over processes and tools 2. Working software over comprehensive documentation 3. Customer collaboration over contract negotiation 4. Responding to change over following a plan Agility is all about responding to change, responding to the unknowns you encounter after you set out. Recipe for working in an agile way: 1. Work out where you are. 2. Make the smallest meaningful step towards where you want to be. 3. Evaluate where you end up, and fix anything you broke (this requires a good design, because it is easier to fix good design). ## Chapter 9: Pragmatic Projects **Maintain small, stable teams.** A pragmatic team is small, under 10-12 or so members. Members come and go rarely. Everyone knows everyone well, trust each other, and depends on each other. Quality is a team issue. The most diligent developer placed on a team that just doesn't care will find it difficult to maintain the enthusiasm needed to fix niggling problems. Teams as a whole should not tolerate broken windows - those small imperfections that no one fixes. **Schedule to make it happen.** If your team is serious about improvement and innovation, you need to schedule it. Trying to get things done "whenever there is a free moment" means they will never happen. Whatever sort of backlog or task list or flow you are working with, don't reserve it for only feature development. The team works on more than just new features: - old systems maintenance - process reflection and refinement - continuous improvement can only happen when you take the time to look around - new tech experiments - try new stuff and analyze results - learning and skill improvements - brown bags, training sessions **Organize fully functional teams.** There is a simple marketing trick that helps teams communicate as one - generate a brand. When you start a project, come up with a name for it, ideally off-the-wall. Spend 30 minutes coming up with a zany logo, and use it, but it gives your team an identity to build on, and the world something memorable to associate with your work. Good communication is key to avoiding problems. You should be able to ask a question of team members and get a more-or-less instant reply. If you have to wait for a week for the team meeting to ask your question or share your status, that is an awful lot of friction. **Do what works, not what is fashionable.** Ask yourself, why are you even using that particular development method/framework/whatever? Does it work well for you? Or it was adopted just because it was being used by the latest internet-fueled story? You want to take the best pieces from any particular methodology and adapt the for use. No one fits for all, and current methods are far from complete, so you will need to look at more than just one popular method. That is very different mindset from "but Scrum/Lean/Kanban/XP/agile does it this way...". The goal isn't to do Scrum/do agile/ do Lean or what-have-you. The goal is to be in a position to deliver working software that gives the users some new capability at a moment's notice. Not weeks, months, or years from now. If you are delivering in years, they shorten the cycle to months. From months, cut it down to weeks. From a four-week sprint, try two. From a two-week sprint, try one. Then daily. Then, finally, on demand. Note that being able to deliver on demand deos not mean you are forced to deliver every minute of every day. You deliver when the users need it, when it makes business sense to do so. **Deliver when users need it.** In order to move to this style of continuous development, you need to a rock-solid infrastructure. Once your infrastructure is in order, you need to decide how to organize the work. Beginners might want to start with Scrum for project management. More disciplined and experienced teams might look to Kanban and Lean techniques. But investigate it first. Try these approaches for yourself. **Use version control to drive builds, tests and releases.** Build, test, and deployment are triggered via commits or pushes version control, and built in a container in the cloud. Release to staging or production is specified by using a tag in your version control system. **Test early, test often, test automatically.** A good project may well have more test code than production code. The time it takes to produce this test code is worth the effort. It ends up being much cheaper in the long run, and you actually stand a chance of producing a product with close to zero defects. **Coding ain't done till all the tests run.** The automatic build runs all available tests. It is important to aim to " test for real" - the test environment should match the production environment closely. The build may cover several major types of software testing: unit testing, integration testing, validation and verification and performance testing. **Use Saboteurs to test your testing.** Because we can't write perfect software, we can't write perfect tests. We need to test the tests. After you have written a test to detect a bug, cause the bug deliberately and make sure the test complains. If you are really serious about testing, take a separate branch, introduce bugs on purpose and verify that the tests will catch them. At a higher level, you can use something like Netflix's Chaos Monkey. **Test state coverage, not code coverage.** Even if you happen to hit every line of code, that is not whole picture. What is important is the number of states that your program may have. States are not equivalent to lines of code. A great wat to explore how your code handles unexpected states is to have a computer generate those states (property-based testing). **Find bug once.** Once a human tester finds a bug, it should be the last time a human tester finds that bug. If a bug slips through the net of existing tests, you need to add a new test to trap it next time. **Don't use manual procedures.** Tracking down differences of any one component usually reveals a surprise. People aren't as repeatable as computers are. Nor should we expect them to be. Everything should depend on automation. Project build, deployment, ... Once you introduce manual steps, you have broken a very large window. **Delight users, don't just deliver code.** If you want to delight your client, forge a relationship with them where you can actively help solve their problems. Be a _Problem Solver_ (not Software Engineer/Developer). That is the essence of a Pragmatic Programmer. **Sign your work.** If we are responsible for a design, or a piece of code, we do a job we can be proud of. Artisans of an earlier age were proud to sign their work. You should be, too. However, you shouldn't jealously defend your code against interlopers, by the same token, you should treat other people's code with respect. Mutual respect among the developers is critical to make this tip work. We want to see pride in ownership "_I wrote this, and I stand behind my work_". Your signature should come to be recognized as an indicator of quality. People should see your name on a piece of code and expect it to be solid, well written, tested and documented. A really professional job. Written by a professional. A Pragmatic Programmer. ## Postface We have a duty to ask ourselves two questions about every piece of code we deliver: 1. Have I protected the user? 2. Would I use this myself? **First, do no harm.** Would I be happy to be a user of this software? Do I want my details shared? Do I want my movements to be given to retail outlets? Would I be happy to be driven by this autonomous vehicle? Am I comfortable doing this? If you are involved in the project, you are just as responsible as the sponsors. **Don't enable scumbags.** **It is your life. Share it. Celebrate. Build It. AND HAVE FUN.** You are building the future. Your duty is to make a future that we would all want to inhabit. Recognize when you are doing something against this ideal, and have courage to say no. ================================================ FILE: books/pytest/.coveragerc ================================================ [paths] source = src/ ================================================ FILE: books/pytest/Dockerfile ================================================ FROM python:3.10.2 WORKDIR /src ENV PYTHONPATH "${PYTHONPATH}:/src" COPY requirements.txt . COPY setup.cfg . RUN pip install -r requirements.txt COPY src/ src/ COPY tests/ tests/ ================================================ FILE: books/pytest/docker-compose.yml ================================================ version: "3.9" services: book: build: context: . dockerfile: Dockerfile volumes: - ./:/src ================================================ FILE: books/pytest/notes.md ================================================ [go back](https://github.com/pkardas/learning) # Python Testing with Pytest: Simple, Rapid, Effective, and Scalable Book by Brian Okken Code here: [click](.) - [Chapter 1: Getting Started with pytest](#chapter-1-getting-started-with-pytest) - [Chapter 2: Writing Test Functions](#chapter-2-writing-test-functions) - [Chapter 3: pytest Fixtures](#chapter-3-pytest-fixtures) - [Chapter 4: Built-in fixtures](#chapter-4-built-in-fixtures) - [Chapter 5: Parametrization](#chapter-5-parametrization) - [Chapter 6: Markers](#chapter-6-markers) - [Chapter 7: Strategy](#chapter-7-strategy) - [Chapter 8: Configuration Files](#chapter-8-configuration-files) - [Chapter 9: Coverage](#chapter-9-coverage) - [Chapter 10: Mocking](#chapter-10-mocking) - [Chapter 11: tox and Continuous Integration](#chapter-11-tox-and-continuous-integration) - [Chapter 12: Testing Scripts and Applications](#chapter-12-testing-scripts-and-applications) - [Chapter 13: Debugging Test Failures](#chapter-13-debugging-test-failures) - [Chapter 14: Third-Party Plugins](#chapter-14-third-party-plugins) - [Chapter 15: Building Plugins](#chapter-15-building-plugins) - [Chapter 16: Advanced Parametrization](#chapter-16-advanced-parametrization) ## Chapter 1: Getting Started with pytest Part of pytest execution is test discovery, where pytest looks for `.py` files starting with `test_` or ending with `_test`. Test methods and functions must start with `test_`, test classes should start with `Test`. Flag `--tb=no` turns off tracebacks. Test outcomes: - PASSED (.) - FAILED (F) - SKIPPED (S) - you can tell pytest to skip a test by using `@pytest.mark.skip` or `@pytest.mark.skipif` - XFAIL (x) - the test was not supposed to pass (`@pytest.mark.xfail`) - XPASS (X) - the teas was marked with xfail, but it ran and passed - ERROR (E) - an exception happened during the execution ## Chapter 2: Writing Test Functions Writing knowledge-building tests - when faced a new data structure, it is often helpful to write some quick tests so that you can understand how the data structure works. The point of these tests is to check my understanding of how the structure works, and possibly to document that knowledge for someone else or even for a future me. `pytest` includes a feature called "_assert rewriting_", that intercepts _assert_ calls and replaces them with something that can tell you more about why your assertions failed. `pytest.fail()` underneath raises an exception. When calling this function or raising an exception directly, we don't get the wonderful "assert rewriting" provided by the `pytest`. Assertion helper function - used to wrap up a complicated assertion check. `__tracebackhide__ = True` the effect will be that failing tests will not include this function in the traceback. Flag `--tb=short` - shorted traceback format. Use `pytest.raises` to test expected exceptions. You can check error details by using `match`, `match` accepts regular expressions and matches it with the exception message. You can also use `as exc_info` (or any other variable name) to interrogate extra parameters. Arrange-Act-Assert or Given-When-Then patterns are about separating test into stages. A common anti-pattern is to have more "Arrange-Assert-Act-Assert-Act-Assert-...". Test should focus on testing one behavior. `pytest` allows to group tests with classes. You can utilize class hierarchies for inherited methods. However, book author doesn't recommend tests inheritance because they easily confuse readers. Use classes only for grouping. `pytest` allows to run a subset of tests, examples: - `pytest ch2/test_classes.py::TestEquality::test_equality` - `pytest ch2/test_classes.py::TestEquality` - `pytest ch2/test_classes.py` - `pytest ch2/test_card.py::test_defaults` - `pytest ch2/test_card.py` `-k` argument takes an expression, and tells pytest to run tests that contain a substring that matches the expression, examples: - `pytest -v -k TestEquality` - `pytest -v -k TestEq` - `pytest -v -k equality` - `pytest -v -k "equality and not equality_fail"` (_and, or, parenthesis, not_ are allowed to create complex expressions) ## Chapter 3: pytest Fixtures Fixtures are helper functions, run by pytest before (and sometimes after) the actual test functions. Code in the fixture can do whatever you want it to do. Fixture can be also used to refer to the resource that is being set up by the fixture functions. `pytest` treats exceptions differently during fixtures compared to during a test function. - FAIL - the failure is somewhere in the test function - ERROR - the failure is somewhere in the fixture Fixtures help a lot when dealing with databases. Fixture functions run before the tests that use them. If there is a `yield` in the function, it stops there, passes control to the tests, and picks up on the next line after the tests are done. The code above `yield` is "setup" and the code after `yield` is "teardown". The code after `yield`, is guaranteed to run regardless of what happens during the tests. Flag `--setup-show` shows us the order of operations of tests and fixtures, including the setup and teardown phases of the fixtures. The scope dictates how often the setup and teardown get run when it is used by multiple test functions: - _function_ - (default scope) run once per test function. The setup is run before each test using the fixture. The teardown is run after each test using the fixture. - _class_ - run once per test class, regardless of how many test methods are in the class. - _module_ - run once per module, regardless of how many test functions/methods of other fixtures in the module use it. - _package_ - run once per package, regardless of how many test functions/methods of other fixtures in the package use it. - _session_ - run once per session, all test methods/functions using a fixture of session scope share one setup and teardown call. The scope is set at the definition of a fixture, and not at the place where it is called `@pytest.fixture(scope=...)`. Fixtures can only depend on other fixtures of their same scope or wider. `conftest.py` is considered by `pytest` as a "local plugin". Gets read by pytest automatically. Use `conftest.py` to share fixtures among multiple test files. We can have `conftest.py` files at every level of our test directory. Test can use any fixture that is in the same test module as a test function, or in a `conftest.py` file in the same directory (or in the parent directory). Use `--fixtures` to show list of all available fixtures our test can use. Use `--fixtures-per-test` to see what fixtures are used by each test and where the functions are defined. Using multiple stage fixtures can provide some incredible speed benefits and maintain test order independence. It is possible to set fixture scope dynamically, e.g. by passing a new flag as an argument. Use `autouse=True` to run fixture all the time. The `autouse` feature is good to have around. But it is more of an exception than a rule. Opt for named fixtures unless you have a really great reason not to. `pytest` allows you to rename fixtures with a `name` parameter to `@pytest.fixture`. ## Chapter 4: Built-in fixtures `tmp path` and `tmp_path_factory` - used to create temporary directories. - `tmp path` - function scope - `tmp_path_factory` - session scope - you have to call `mktemp` to get a directory - `tmpdir_factory` - similar to `tmp_path_factory`, but instead of `Path`, returns `py.path.local` `capsys` - enables the capturing of writes to `stdout` and `stderr`. - `capfd` - like `capsys`, but captures file descriptors 1 and 2 (stdout and stderr) - `capsysbinary` - `capsys` captures text, `capsysbinary` captures binary - `caplog` - captures output written with the logging package A "monkey patch" is a dynamic modification of a class or module during runtime. "Monkey patching" is a convenient way to take over part of the runtime environment of the application code and replace it with entities that are more convenient for testing. `monkeypatch` - used to modify objects, directories, evn variables. When test ends, the original unpatched code is restored. It has the following functions: - `setattr` - sets an attribute - `delattr` - deletes an attribute - `setitem` - sets a directory entry - `delitem` - deletes a directory entry - `setenv` - sets an env variable - `delenv` - deletes an env variable - `syspath_prepend` - prepends, `path` to `sys.path`, which is Python's lis of import locations - `chdir` - changes the current working directory If you start using monkey-patching: - you will start to understand this - you will start to avoid mocking and monkey-patching whenever possible DESIGN FOR TESTABILITY. A concept borrowed from hardware designers. Concept of adding functionality to software to make it easier to test. More fixtures: https://docs.pytest.org/en/6.2.x/fixture.html or run `pytest --fixtures`. ## Chapter 5: Parametrization Parametrized tests refer to adding parameters to our test functions and passing in multiple sets of arguments to the test to create new test cases. With fixture parametrization, we shift parameters to a fixture, `pytest` will then call the fixture once each for every set of values we provide. Fixture parametrization has the benefit of having a fixture run for each set of arguments. This is useful if you have setup or teardown code that needs to run for each test case - e.g. different database connection, different file content, ... `pytest_generate_tests` - hook function. Allows you to modify the parametrization list at test collection time in interesting ways. ## Chapter 6: Markers Markers are a way to tell pytest there is something special about a particular test. You can think of them like tags or labels. If some tests are slow, you can mark them with `@pytest.mark.slow` and have pytest skip those tests when you are in hurry. You can pick a handful of tests out of a test suite and mark them with `@pytest.mark.smoke`. Built-in markers: - `@pytest.mark.filterwarnings(warning)` - adds a warning filter to the given test - `@pytest.mark.skip(reason=None)` - skip the test with an optional reason - `@pytest.mark.skipif(condition, ..., *, reason)` - skip the test if any of the conditions are true - `@pytest.mark.xfail(condition, ..., *, reason, run=True, raises=None, stric=xfail_strict)` - we can expect the test to fail. If we want to run all tests, even those that we know will fail, we can use this marker. - `@pytest.mark.parametrize(argnames, argvalues, indirect, ids, scope)` - call a test function multiple times - `@pytest.mark.usefixtures(fixturename1, fizxturename2, ...)` - marks tests as needing all rhe specified fixtures Custom markers - you need to add `pytest.ini` with marker definition, some ideas for markers: - `@pytest.mark.smoke` - run `pytest -v -m smoke` to run smoke tests only - `@pytest.mark.exception` - run `pytest -v -m exception` to run exception-related tests only Custom markers shine when we have more files involved. We can also add markers to entire files or classes. We can even put multiple markers on a single test. File-level marker: ```python pytestmark = [pytest.mark.marker_one, pytest.mark.marker_two] ``` When filtering tests using markers, it is possible to combine markers and use a bit of logic, just like we did with the `-k` keyword, e.g. `pytest -v -m "custom and exception"`, `pytest -v -m "finish and not smoke"`. `--strict-markers` - raises an error when mark was not found (by default a warning is raised). Also, an error is raised at collection time, not at run time - error is reported earlier. Markers can be used in conjunction with fixtures. Use `--markers` to list all available markers. ## Chapter 7: Strategy _Testing enough to sleep at night_: The idea of testing enough so that you can sleep at night may have come from software systems where developers have to be on call to fix software if it stops working in the middle of the night. It has been extended to including sleeping soundly, knowing that your software is well tested. Testing through the API tests most of the system and logic. Before you create the test cases you want to test, evaluate what features to test. When you have a lot of functionality and features to test, you have to prioritize the order of developing tests. At least a rough idea of order helps. Prioritize using the following factors: 1. Recent - new features, new areas of code, recently modified, refactored. 2. Core - your product's unique selling propositions. The essential functions that must continue to work in order for the product to be useful. 3. Risk - areas of the application that pose more risk, such as areas important to customers but not used regularly by the development team or parts that use 3-rd party code you don't trust. 4. Problematic - functionality that frequently breaks or even gets defect reports against it. 5. Expertise - features or algorithms understood by a limited subset of people Creating test cases. - start with a non-trivial, "happy path" test case - then look at test cases that represent - interesting set of inputs - interesting starting states - interesting end states - all possible error states ## Chapter 8: Configuration Files Non-test files that affect how _pytest_ runs. - `pytest.ini` - primary pytest configuration file that allows you to change pytest's default behavior. Its location also defines the pytest root directory. - `conftest.py` - this file contains fixtures and hook functions. It can exist in at the root directory or in any subdirectory. It is a good idea to stick to only one `conftest.py` file, so you can find fixture definitions easily. - `__init__.py` - when put into test subdirectories, this file allows you to have identical test file names in multiple test directories. This means you can have `api/test_add.py` and `cli/test_add.py` but only if you have `__init__.py` in both directories. - `tox.ini`, `pyproject.toml`, `setup.cfg` - these files can take the place of `pytest.ini` Example `pytest.ini`: ``` [pytest] -- including `[pytest]` in `pytest.ini` allows the pytest ini parsing to treat `pytest.ini` and `tox.ini` identically addopts = -- enables us to list the pytest flags we always want to run in this project --stric-markers -- raise an error for any unregistered marker --strict-config -- raise an error for any difficulty in parsing config files -ra -- display extra text summary at the end of a test run testpaths = tests -- tells the python wehere to look for tests markers = -- declare markers smoke: subset of tests exception: check for expected exceptions ``` Example `tox.ini`: ``` [tox] ; tox specific settings [pytest] addopts = --stric-markers --strict-config -ra ... ``` Example `pyptoject.toml`: ``` [tool.pytest.ini_options] addopts = [ "--stric-markers", "--strict-config", "-ra" ] testpaths = tests markers =[ "smoke: subset of tests", "exception: check for expected exceptions" ] ``` Example `setup.cfg`: ``` [tool:pytest] addopts = --stric-markers --strict-config -ra ... ``` Even if you don't need any configuration settings, it is still a great idea to place an empty `pytest.ini` at the top of your project, because pytest may keep searching for this file. ## Chapter 9: Coverage Tools that measure code coverage watch your code while a test suite is being run and keep track of which lines are hit and which are not. That measurement is called "line coverage" = "total number of lines" / "total lines of code". Code coverage tools can also tell you if all paths are taken in control statements - "branch coverage". Code coverage cannot tell you if your test suite is good - it can only tell you how much of the application code is getting hit by your test suite. `coverage.py` - preferred Python coverage tool, `pytest-cov` - popular pytest plugin (depends on `coverage.py`, so it will be installed as well). To run tests with `coverage.py`, you need to add `--cov` flag. To add missing lines to the terminal report, add the `--cov-report=term-missing` flag. `coverage.py` is able to generate HTML reports: `docker-compose run --rm book pytest --cov=src --cov-report=html`, to help view coverage data in more detail. `# pragra: no cover` - tells `coverage` to exclude either a single line or a block of code. **Beware of Coverage-Driven Development!** The problem with adding tests just to hit 100% is that doing so will mask the fact that these lines aren't being used and therefore are not needed by the application. It also adds test code and coding time that is not necessary. ## Chapter 10: Mocking The `mock` package is used to swap out pieces of the system to isolate bits of our application code from the rest of the system. Mock objects are called sometimes _test doubles_, _spies_, _fakes_ or _stubs_. Typer provides a testing interface. With it, we don't have use `subprocess.run`, which is good, because we can't mock stuff running in a separate process. Mocks by default accept any access. If real object allows `.start(index)`, we want our mock objects to allow `start(index)` as well. Mock objects are too flexible by default - they will also accept `star()` - any misspelled methods, additional parameters, really anything. _Mock drift_ - occurs when the interface you are mocking changes, and your mock in your test code doesn't. Use `autospec=True` - without it, mock will allow you to call any function, with any parameters, even if it doesn't make sense for the real thing being mocked. Always use _autospec_ when you can. **Mocking tests implementation, not behavior.** When we are using mocks in a test, we are no longer testing behavior, but testing implementation. Focusing tests on testing implementation is dangerous and time-consuming. _Change detector test_ - test that break during valid refactoring. When test fail whenever the code changes, they are change detector tests, and are usually more trouble than they worth. Mocking is useful when you need to generate an exception or make sure your code calls a particular API method when it is supposed to, with the correct parameters. There are several special-purpose mocking libraries: - mocking database: `pytest-postgresql`, `pytest-mongo`, `pytest-mysql`, `pytest-dynamodb` - mocking HTTP servers: `pytest-httpserver` - mocking requests: `responses`, `betamax` - other: `pytest-rabbitmq`, `pytest-soir`, `pytest-elasticsearch`, `pytest-redis` Adding functionality that makes testing easier is part of "design for testability" and can be used to allow testing at multiple levels or testing at a higher level. ## Chapter 11: tox and Continuous Integration CI refers to the practice of merging all developers' code changes into a shared repository on a regular basis - often several times a day. Before the implementation of CI, teams used version control to keep track of code updates, and different developers would add a feature/fix on the separate branches. Then code was merged, built, and tested. The frequency of merge varied from "when your code is ready, merge it" to regularly scheduled merges (weekly, monthly). The merge was called _integration_ because the code is being integrated together. With this soft of version control, code conflicts happened often. Some merge errors were not found until very late. CI tools build and run tests all on their own, usually triggered by a merge request. Because the build and test stages are automated, developers can integrate more frequently, even several times a day. CI tools automate the process of build and test. `tox` - command-line tool that allows you to run complete suite of tests in multiple envs. Great starting point when learning about CI. `tox`: 1. creates a virtual env in a .tox directory 2. pip installs some dependencies 3. builds your package 4. pip installs your package 5. runs your tests `tox` can automate testing process locally, but also it helps with cloud-based CI. You can integrate tox with GitHub Actions. ## Chapter 12: Testing Scripts and Applications Definitions: - script - a single file containing Python code that is intended to be run directly from Python - importable script - a script in which no code is executed when it is imported. Code is executed only when it is run directly - application - package or script that has external dependencies Testing a small script with `subprcoess.run` works okay, but it does have drawbacks - we may want to test sections of larger scripts separately - we may want to separate test code and scripts into different directories Solution for this is to make a script importable. Add `if __name__ == "__main__"` - this code is executed only when we call the script with `python script.py`. ## Chapter 13: Debugging Test Failures pytest includes few command-line flags that are useful for debugging: - `-lf` / `--last-failed` - runs just the tests that failed last - `-ff` / `--failed-first` - runs all the test, starting from the last failed - `-x` / `--exitfirst` - stops the test session after the first failure - `--maxfail=num` -stops the tests after `num` failures - `-nf` / `--new-first` - runs all the tests, ordered by the modification time - `--sw` / `--stepwise` - stops the tests at the first failure, starts the test at the last failure next time - `--sw-skip` / `--stepwise-skip` - same as `--sw`, but skips the first failure Flags to control pytest output: - `-v` / `--verbose` - all the test names, passing or failing - `--tb=[auto/long/short/line/native/no]` - controls the traceback style - `-l` / `--showlocals` - displays local variables alongside the stacktrace Flags to start a command-line debugger: - `--pdb` - starts an interactive debugging session at the point of failure - `--trace` - starts the pdb source-code debugger immediately when running each test - `--pdbcls` - uses alternatives to pdb `pdb` - Python Debugger - part of the Python standard library. Add `breakpoint()` call, when a pytest hits this function call, it will stop there and launch `pdb`. There are common commands recognized by `pdb` - full list in the documentation (or use PyCharm's debugger instead if you can). ## Chapter 14: Third-Party Plugins The pytest code is designed to allow customisation and extensions, and there are hooks available to allow modifications and improvements through plugins. Every time you put fixtures and/or hook functions into a project's `conftest.py` file, you create a local plugin. Only some extra work is needed to turn these files into installable plugins. `pytest` plugins are installed with `pip`. Plugins that change the normal test run flow: - `pytest-order` - specify the order using marker - `pytest-randomly` - randomize order, first by file, then by a class, then by test - `pytest-repeat` - makes it easy to repeat a single/multiple test(s), specific number of times - `pytest-rerunfailures` - rerun failed tests (helpful for flaky tests) - `pytest-xdist` - runs tests in parallel, either using multiple CPUs or multiple remote machines Plugins that alter or enhance output: - `pytest-instafail` - reports tracebacks and output from failed tests right after the failure - `pytest-sugar` - shows green checkmarks instead of dots and has nice progress bar - `pytest-html` - allows for HTML report generation Plugins for web development: - `pytest-selenium` - additional fixtures to allow easy configuration of browser-based tests - `pytest-splinter` - built on top of Selenium, allows Splinter to be used more easily from pytest - `pytest-django`, `pytest-flask` - make testing Django/Flask apps easier Plugins for fake data: - `Faker` - generates fake data, provides `faker` fixture - `model-bakery` - generates Django models with fake data - `pytest-factoryboy` - includes fixtures for Factory Boy - `pytest-mimesis` - generates fake data similarly to Faker, but Mimesis is quite a bit faster Plugins that extend pytest functionality: - `pytest-cov` - runs coverage while testing - `pytest-benchmark` - runs benchmark timing on code within tests - `pytest-timeout` - doesn't let tests run too long - `pytest-asyncio` - test async functions - `pytest-bdd` - BDD-style tests with pytest - `pytest-freezegun` - freezes time so that any code that reads the time will get the same value during a tests, you can also set a particular date or time - `pytest-mock` - thin wrapper around the `unittest.mock` Full list of plugins: https://docs.pytest.org/en/latest/reference/plugin_list.html ## Chapter 15: Building Plugins Hook functions - function entry points that pytest provides to allow plugin developers to intercept pytest behaviour at certain points and make changes. There are multiple hook functions, example: - `pytest_configure()` - perform initial config. We can use this function to for example, pre-declare `slow` marker. - `pytest_addoption()` - register options and settings, e.g. new flag: _--slow_ - `pytest_collection_modifyitems()` - called after test collection, can be used to filter or re-order the test items, e.g. to find _slow_ tests The Node Interface: https://docs.pytest.org/en/latest/reference/reference.html#node You can transform local `conftest.py` to installable plugin. You can use `Flit` to get help with the `pyproject.toml` and `LICENSE`. Plugins are code that needs to be tested just like any other code. `pytester` ias a plugin shipped with `pytest`. `pytester` creates a temporary directory for each test that uses the `pytester` fixture, there are a bunch of functions to help populate this directory - https://docs.pytest.org/en/latest/reference/reference.html#pytester ## Chapter 16: Advanced Parametrization When using complex parametrization values, `pytest` numbers test cases like: `starting_card0, starting_card1, ...`. It is possible to generate custom identifiers: ```py card_list = [ Card("foo", "todo"), Card("foo", "in prog"), Card("foo", "done"), ] @pytest.mark.parametrize("starting_card", card_list, ids=str) ``` You can write custom ID function: ```py def cards_state(card): return card.state @pytest.mark.parametrize("starting_card", card_list, ids=cards_state) ``` Lambda function works as well: ```py @pytest.mark.parametrize("starting_card", card_list, ids=lambda c: c.state) ``` If you have one wor two parameters requiring special treatment, use `pytest.param` to override the ID: ```py card_list = [ Card("foo", "todo"), pytest.param(Card("foo", "in prog"), id="special"), Card("foo", "done"), ] @pytest.mark.parametrize("starting_card", card_list, ids=cards_state) ``` You can supply a list to `ids`, instead of a function: ```py id_list = ["todo", "in prog", "done"] @pytest.mark.parametrize("starting_card", card_list, ids=id_list) ``` but you have to be extra careful to keep the lists synchronized. Otherwise, the IDs are wrong. It is possible to write our own function to generate parameter values: ```py def text_variants(): # This function can read data from a file/API/database/... as well. variants = {...: ...} for key, value in variants.items(): yield pytest.param(value, id=key) @pytest.mark.parametrize("variant", text_variants()) ``` If you want to test all combinations, stacking parameters is the way to go: ```py @pytest.mark.parametrize("state", states) @pytest.mark.parametrize("owner", owners) @pytest.mark.parametrize("summary", summaries) def test_stacking(summary, owner, state): ``` this will act rather like cascading for loops, looping on the parameters from the bottom decorator to the top. An _indirect parameter_ is the one that get passed to a fixture before it gets send to the test function. Indirect parameters essentially let us parameterize a fixture, while keeping the parameter values with the test function. This allows different tests to use the same fixture with different parameter values. ```py @pytest.fixture() def user(request): role = request.param print(f"Logging in as {role}") yield role print(f"Logging out {role}") @pytest.mark.parametrize("user", ["admin", "team_member", "visitor"], indirect=["user"]) def test_access_rights(user): ... ``` ================================================ FILE: books/pytest/requirements.txt ================================================ tinydb pytest faker tox coverage pytest-cov tinydb typer rich ================================================ FILE: books/pytest/setup.cfg ================================================ [tool:pytest] python_paths = . testpaths = tests ================================================ FILE: books/pytest/src/__init__.py ================================================ """Top-level package for cards.""" __version__ = "1.0.0" from .api import * # noqa from .cli import app # noqa ================================================ FILE: books/pytest/src/api.py ================================================ """ API for the cards project """ from dataclasses import asdict from dataclasses import dataclass from dataclasses import field from src.db import DB __all__ = [ "Card", "CardsDB", "CardsException", "MissingSummary", "InvalidCardId", ] __version__ = "1.0.0" @dataclass class Card: summary: str = None owner: str = None state: str = "todo" id: int = field(default=None, compare=False) @classmethod def from_dict(cls, d): return Card(**d) def to_dict(self): return asdict(self) class CardsException(Exception): pass class MissingSummary(CardsException): pass class InvalidCardId(CardsException): pass class CardsDB: def __init__(self, db_path): self._db_path = db_path self._db = DB(db_path, ".cards_db") def add_card(self, card: Card) -> int: """Add a card, return the id of card.""" if not card.summary: raise MissingSummary if card.owner is None: card.owner = "" id = self._db.create(card.to_dict()) self._db.update(id, {"id": id}) return id def get_card(self, card_id: int) -> Card: """Return a card with a matching id.""" db_item = self._db.read(card_id) if db_item is not None: return Card.from_dict(db_item) else: raise InvalidCardId(card_id) def list_cards(self, owner=None, state=None): """Return a list of cards.""" all = self._db.read_all() if (owner is not None) and (state is not None): return [ Card.from_dict(t) for t in all if (t["owner"] == owner and t["state"] == state) ] elif owner is not None: return [ Card.from_dict(t) for t in all if t["owner"] == owner ] elif state is not None: return [ Card.from_dict(t) for t in all if t["state"] == state ] else: return [Card.from_dict(t) for t in all] z def count(self) -> int: """Return the number of cards in db.""" return self._db.count() def update_card(self, card_id: int, card_mods: Card) -> None: """Update a card with modifications.""" try: self._db.update(card_id, card_mods.to_dict()) except KeyError as exc: raise InvalidCardId(card_id) from exc def start(self, card_id: int): """Set a card state to 'in prog'.""" self.update_card(card_id, Card(state="in prog")) def finish(self, card_id: int): """Set a card state to 'done'.""" self.update_card(card_id, Card(state="done")) def delete_card(self, card_id: int) -> None: """Remove a card from db with given card_id.""" try: self._db.delete(card_id) except KeyError as exc: raise InvalidCardId(card_id) from exc def delete_all(self) -> None: """Remove all cards from db.""" self._db.delete_all() def close(self): self._db.close() def path(self): return self._db_path ================================================ FILE: books/pytest/src/cli.py ================================================ """Command Line Interface (CLI) for cards project.""" import os import pathlib from contextlib import contextmanager from io import StringIO from typing import List import rich import typer from rich.table import Table import src.api as cards app = typer.Typer(name="cards", add_completion=False) @app.command() def version(): """Return version of cards application""" print(cards.__version__) @app.command() def add( summary: List[str], owner: str = typer.Option(None, "-o", "--owner") ): """Add a card to db.""" summary = " ".join(summary) if summary else None with cards_db() as db: db.add_card(cards.Card(summary, owner, state="todo")) @app.command() def delete(card_id: int): """Remove card in db with given id.""" with cards_db() as db: try: db.delete_card(card_id) except cards.InvalidCardId: print(f"Error: Invalid card id {card_id}") @app.command("list") def list_cards( owner: str = typer.Option(None, "-o", "--owner"), state: str = typer.Option(None, "-s", "--state"), ): """ List cards in db. """ with cards_db() as db: the_cards = db.list_cards(owner=owner, state=state) table = Table(box=rich.box.SIMPLE) table.add_column("ID") table.add_column("state") table.add_column("owner") table.add_column("summary") for t in the_cards: owner = "" if t.owner is None else t.owner table.add_row(str(t.id), t.state, owner, t.summary) out = StringIO() rich.print(table, file=out) print(out.getvalue()) @app.command() def update( card_id: int, owner: str = typer.Option(None, "-o", "--owner"), summary: List[str] = typer.Option(None, "-s", "--summary"), ): """Modify a card in db with given id with new info.""" summary = " ".join(summary) if summary else None with cards_db() as db: try: db.update_card( card_id, cards.Card(summary, owner, state=None) ) except cards.InvalidCardId: print(f"Error: Invalid card id {card_id}") @app.command() def start(card_id: int): """Set a card state to 'in prog'.""" with cards_db() as db: try: db.start(card_id) except cards.InvalidCardId: print(f"Error: Invalid card id {card_id}") @app.command() def finish(card_id: int): """Set a card state to 'done'.""" with cards_db() as db: try: db.finish(card_id) except cards.InvalidCardId: print(f"Error: Invalid card id {card_id}") @app.command() def config(): """List the path to the Cards db.""" with cards_dbz() as db: print(db.path()) @app.command() def count(): """Return number of cards in db.""" with cards_db() as db: print(db.count()) @app.callback(invoke_without_command=True) def main(ctx: typer.Context): """ Cards is a small command line task tracking application. """ if ctx.invoked_subcommand is None: list_cards(owner=None, state=None) def get_path(): db_path_env = os.getenv("CARDS_DB_DIR", "") if db_path_env: db_path = pathlib.Path(db_path_env) else: db_path = pathlib.Path.home() / "cards_db" return db_path @contextmanager def cards_db(): db_path = get_path() db = cards.CardsDB(db_path) yield db db.close() ================================================ FILE: books/pytest/src/db.py ================================================ """ DB for the cards project """ import tinydb class DB: def __init__(self, db_path, db_file_prefix): self._db = tinydb.TinyDB( db_path / f"{db_file_prefix}.json", create_dirs=True ) def create(self, item: dict) -> int: id = self._db.insert(item) return id def read(self, id: int): item = self._db.get(doc_id=id) return item def read_all(self): return self._db def update(self, id: int, mods) -> None: changes = {k: v for k, v in mods.items() if v is not None} self._db.update(changes, doc_ids=[id]) def delete(self, id: int) -> None: self._db.remove(doc_ids=[id]) def delete_all(self) -> None: self._db.truncate() def count(self) -> int: return len(self._db) def close(self): self._db.close() ================================================ FILE: books/pytest/tests/ch_02/test_card.py ================================================ import pytest from src import Card def test_field_access(): c = Card("something", "brian", "todo", 123) assert (c.summary, c.owner, c.state, c.id) == ("something", "brian", "todo", 123) def test_defaults(): c = Card() assert (c.summary, c.owner, c.state, c.id) == (None, None, "todo", None) def test_equality(): assert Card("something", "brian", "todo", 123) == Card("something", "brian", "todo", 123) def test_equality_with_different_ids(): assert Card("something", "brian", "todo", 123) == Card("something", "brian", "todo", 321) def test_inequality(): assert Card("something", "brian", "todo", 123) != Card("completely different", "okken", "todo", 123) def test_to_dict(): assert Card.from_dict({ "summary": "something", "owner": "brian", "state": "todo", "id": 123 }) == Card("something", "brian", "todo", 123) def test_from_dict(): assert Card("something", "brian", "todo", 123).to_dict() == { "summary": "something", "owner": "brian", "state": "todo", "id": 123 } ================================================ FILE: books/pytest/tests/ch_02/test_classes.py ================================================ from src import Card class TestEquality: def test_equality(self): assert Card("something", "brian", "todo", 123) == Card("something", "brian", "todo", 123) def test_equality_with_different_ids(self): assert Card("something", "brian", "todo", 123) == Card("something", "brian", "todo", 321) def test_inequality(self): assert Card("something", "brian", "todo", 123) != Card("completely different", "okken", "todo", 123) ================================================ FILE: books/pytest/tests/ch_02/test_exceptions.py ================================================ import pytest from src import CardsDB def test_no_path_raises(): with pytest.raises(TypeError): CardsDB() def test_raises_with_info(): with pytest.raises(TypeError, match="missing 1 .* positional argument"): CardsDB() ================================================ FILE: books/pytest/tests/ch_02/test_helper.py ================================================ import pytest from src import Card def assert_identical(c1: Card, c2: Card): # Do not include 'assert_identical' in traceback: __tracebackhide__ = True assert c1 == c2 if c1.id != c2.id: pytest.fail(f"id's don't match. {c1.id} != {c2.id}") def test_identical(): assert_identical(Card("foo", id=123), Card("foo", id=123)) @pytest.mark.skip() def test_identical_fail(): assert_identical(Card("foo", id=123), Card("foo", id=321)) ================================================ FILE: books/pytest/tests/ch_03/conftest.py ================================================ from pathlib import Path from tempfile import TemporaryDirectory import pytest from src import ( Card, CardsDB, ) @pytest.fixture(scope="session") def db(): with TemporaryDirectory() as db_dir: db_path = Path(db_dir) _db = CardsDB(db_path) yield _db _db.close() @pytest.fixture(scope="function") def cards_db(db): db.delete_all() return db @pytest.fixture(scope="session") def some_cards(): return [ Card("write book", "brian", "done"), Card("edit book", "katie", "done"), Card("write 2nd edition", "brian", "todo"), Card("edit 2nd edition", "katie", "todo"), ] ================================================ FILE: books/pytest/tests/ch_03/test_autouse.py ================================================ from time import ( localtime, sleep, strftime, time, ) import pytest @pytest.fixture(scope="function") def non_empty_db(cards_db, some_cards): for c in some_cards: cards_db.add_card(c) return cards_db @pytest.fixture(autouse=True, scope="session") def footer_session_scope(): yield now = time() print("---") print(f"finished : {strftime('%d %b %X', localtime(now))}") print("--------") @pytest.fixture(autouse=True) def footer_function_scope(): start = time() yield stop = time() print(f"test duration: {stop - start:0.3}") def test_1(): sleep(1) def test_2(): sleep(1.23) ================================================ FILE: books/pytest/tests/ch_03/test_count.py ================================================ from src import Card def test_empty(cards_db): assert cards_db.count() == 0 def test_two(cards_db): cards_db.add_card(Card("first")) cards_db.add_card(Card("second")) assert cards_db.count() == 2 def test_three(cards_db): cards_db.add_card(Card("first")) cards_db.add_card(Card("second")) cards_db.add_card(Card("three")) assert cards_db.count() == 3 ================================================ FILE: books/pytest/tests/ch_03/test_count_initial.py ================================================ from pathlib import Path from tempfile import TemporaryDirectory from src import CardsDB def test_empty(): with TemporaryDirectory() as db_dir: db_path = Path(db_dir) db = CardsDB(db_path) count = db.count() db.close() assert count == 0 ================================================ FILE: books/pytest/tests/ch_03/test_fixtures.py ================================================ import pytest @pytest.fixture() def some_data(): return 42 def test_some_data(some_data): assert some_data == 42 ================================================ FILE: books/pytest/tests/ch_03/test_rename_fixture.py ================================================ import pytest @pytest.fixture(name="ultimate_answer") def ultimate_answer_fixture(): return 42 def test_everything(ultimate_answer): assert ultimate_answer == 42 ================================================ FILE: books/pytest/tests/ch_03/test_some.py ================================================ import pytest @pytest.fixture(scope="function") def non_empty_db(cards_db, some_cards): for c in some_cards: cards_db.add_card(c) return cards_db def test_add_some(cards_db, some_cards): expected_count = len(some_cards) for c in some_cards: cards_db.add_card(c) assert cards_db.count() == expected_count def test_non_empty(non_empty_db): assert non_empty_db.count() > 0 ================================================ FILE: books/pytest/tests/ch_04/conftest.py ================================================ import pytest from src import CardsDB @pytest.fixture(scope="session") def db(tmp_path_factory): db_path = tmp_path_factory.mktemp("cards_db") _db = CardsDB(db_path) yield _db _db.close() ================================================ FILE: books/pytest/tests/ch_04/test_config.py ================================================ import src as cards from typer.testing import CliRunner def run_cards(*params): runner = CliRunner() result = runner.invoke(cards.app, params) return result.output.rstrip() def test_run_cards(): assert run_cards("version") == cards.__version__ def test_patch_get_path(monkeypatch, tmp_path): def fake_get_path(): return tmp_path monkeypatch.setattr(cards.cli, "get_path", fake_get_path) assert run_cards("config") == str(tmp_path) def test_patch_home(monkeypatch, tmp_path): full_cards_dir = tmp_path / "cards_db" def fake_home(): return tmp_path monkeypatch.setattr(cards.cli.pathlib.Path, "home", fake_home) assert run_cards("config") == str(full_cards_dir) def test_patch_env_var(monkeypatch, tmp_path): monkeypatch.setenv("CARDS_DB_DIR", str(tmp_path)) assert run_cards("config") == str(tmp_path) ================================================ FILE: books/pytest/tests/ch_04/test_tmp.py ================================================ def test_tmp_path(tmp_path): file = tmp_path / "file.txt" file.write_text("Hello") assert file.read_text() == "Hello" def test_tmp_path_factory(tmp_path_factory): path = tmp_path_factory.mktemp("sub") file = path / "file.txt" file.write_text("Hello") assert file.read_text() == "Hello" ================================================ FILE: books/pytest/tests/ch_04/test_version.py ================================================ from typer.testing import CliRunner import src as cards def test_version(capsys): cards.cli.version() output = capsys.readouterr().out.rstrip() assert output == cards.__version__ def test_version_v2(): runner = CliRunner() result = runner.invoke(cards.app, ["version"]) output = result.output.rstrip() assert output == cards.__version__ ================================================ FILE: books/pytest/tests/ch_05/test_parametrize.py ================================================ import pytest from src import ( Card, CardsDB, ) @pytest.fixture(scope="session") def db(tmp_path_factory): db_path = tmp_path_factory.mktemp("cards_db") _db = CardsDB(db_path) yield _db _db.close() @pytest.fixture(scope="function") def cards_db(db): db.delete_all() return db @pytest.mark.parametrize("initial_state", ["done", "in prog", "todo"]) def test_finish(cards_db, initial_state): c = Card("write a book", state=initial_state) index = cards_db.add_card(c) cards_db.finish(index) c = cards_db.get_card(index) assert c.state == "done" @pytest.fixture(params=["done", "in prog", "todo"]) def start_state(request): return request.param def test_finish_v2(cards_db, start_state): c = Card("write a book", state=start_state) index = cards_db.add_card(c) cards_db.finish(index) c = cards_db.get_card(index) assert c.state == "done" def pytest_generate_tests(metafunc): if "start_state_2" in metafunc.fixturenames: metafunc.parametrize("start_state_2", ["done", "in prog", "todo"]) def test_finish_v3(cards_db, start_state_2): c = Card("write a book", state=start_state_2) index = cards_db.add_card(c) cards_db.finish(index) c = cards_db.get_card(index) assert c.state == "done" ================================================ FILE: books/pytest/tests/ch_06/pytest.ini ================================================ [pytest] markers = smoke: subset of tests exception: check for expected exceptions custom: run only ch_06/custom num_cards: number of cards to prefill for cards_db fixture adopts = --stric-markers ================================================ FILE: books/pytest/tests/ch_06/test_builtin.py ================================================ from pathlib import Path from tempfile import TemporaryDirectory import pytest from packaging.version import parse from src import ( Card, CardsDB, api, ) @pytest.mark.skip(reason="card doesn't support comparison yet") def test_less_than_skip(): assert Card("a task") < Card("b task") @pytest.mark.skipif( parse(api.__version__).major < 2, reason="Card comparison not supported in 1.x" ) def test_less_than_skipif(): assert Card("a task") < Card("b task") @pytest.mark.xfail( parse(api.__version__).major < 2, reason="Card comparison not supported in 1.x" ) def test_less_than_xfail(): assert Card("a task") < Card("b task") @pytest.mark.xfail(reason="XPASS demo") def test_xpass(): assert Card("a task") == Card("a task") @pytest.mark.xfail(reason="strict demo", strict=True) @pytest.mark.skip def test_xpass_strict(): assert Card("a task") == Card("a task") ================================================ FILE: books/pytest/tests/ch_06/test_custom.py ================================================ import pytest from src import ( Card, CardsDB, InvalidCardId, ) pytestmark = [pytest.mark.custom] @pytest.fixture(scope="session") def db(tmp_path_factory): db_path = tmp_path_factory.mktemp("cards_db") _db = CardsDB(db_path) yield _db _db.close() @pytest.fixture(scope="function") def cards_db(db): db.delete_all() return db @pytest.mark.smoke def test_start(cards_db): i = cards_db.add_card(Card("foo", state="todo")) cards_db.start(i) c = cards_db.get_card(i) assert c.state == "in prog" @pytest.mark.exception def test_start_non_existent(cards_db): with pytest.raises(InvalidCardId): cards_db.start(123) ================================================ FILE: books/pytest/tests/ch_06/text_combination.py ================================================ import pytest from src import ( Card, CardsDB, ) @pytest.fixture(scope="session") def db(tmp_path_factory): db_path = tmp_path_factory.mktemp("cards_db") _db = CardsDB(db_path) yield _db _db.close() @pytest.fixture(scope="function") def cards_db(db, request, faker): db.delete_all() faker.seed_instance(101) m = request.node.get_closest_marker("num_cards") if m and len(m.args) > 0: num_cards = m.args[0] for _ in range(num_cards): db.add_card(Card(summary=faker.sentence(), owner=faker.first_name())) return db @pytest.mark.num_cards def test_zero(cards_db): assert cards_db.count() == 0 @pytest.mark.num_cards(3) def test_three(cards_db): assert cards_db.count() == 3 ================================================ FILE: books/pytest/tests/ch_12/hello.py ================================================ def main(): print("Hello world") if __name__ == '__main__': main() ================================================ FILE: books/pytest/tests/ch_12/test_hello.py ================================================ from tests.ch_12 import hello def test_hello(capsys): hello.main() output = capsys.readouterr().out assert output == "Hello world\n" ================================================ FILE: books/pytest/tests/ch_15/conftest.py ================================================ import pytest def pytest_configure(config): config.addinivalue_line("markers", "slow: mark test as slow to run") def pytest_addoption(parser): parser.addoption("--slow", action="store_true", help="include tests marked slow") def pytest_collection_modifyitems(config, items): if not config.getoption("--slow"): skip_slow = pytest.mark.skip(reason="need --slow option to run") for item in items: if item.get_closest_marker("slow"): item.add_marker(skip_slow) ================================================ FILE: books/pytest/tests/ch_15/pytest.ini ================================================ [pytest] markers = slow: mark test as slow to run ================================================ FILE: books/pytest/tests/ch_15/test_slow.py ================================================ import pytest def test_normal(): pass @pytest.mark.slow def test_slow(): pass ================================================ FILE: books/python-architecture-patterns/Dockerfile ================================================ FROM python:3.10.2 WORKDIR /src ENV PYTHONPATH "${PYTHONPATH}:/src" COPY requirements.txt . COPY setup.cfg . RUN pip install -r requirements.txt COPY src/ src/ COPY tests/ tests/ ================================================ FILE: books/python-architecture-patterns/Makefile ================================================ test-flake8: docker-compose run --rm api flake8 . test-mypy: docker-compose run --rm api mypy . test-pytest: docker-compose run --rm api pytest . ================================================ FILE: books/python-architecture-patterns/docker-compose.yml ================================================ version: "3.9" services: redis_pubsub: build: context: . dockerfile: Dockerfile image: allocation-image depends_on: - postgres - redis - mailhog environment: - DB_HOST=postgres - DB_PASSWORD=abc123 - REDIS_HOST=redis - EMAIL_HOST=mailhog - PYTHONDONTWRITEBYTECODE=1 volumes: - ./:/src entrypoint: - python - src/redis_consumer.py api: image: allocation-image build: context: . dockerfile: Dockerfile depends_on: - redis_pubsub volumes: - ./:/src environment: - DB_HOST=postgres - DB_PASSWORD=abc123 - API_HOST=api - REDIS_HOST=redis - EMAIL_HOST=mailhog - PYTHONUNBUFFERED=1 - PYTHONDONTWRITEBYTECODE=1 command: uvicorn src.app:api --host 0.0.0.0 --port 80 --reload ports: - "5005:80" postgres: image: postgres:14.2 environment: - POSTGRES_USER=allocation - POSTGRES_PASSWORD=abc123 ports: - "54321:5432" redis: image: redis:alpine ports: - "63791:6379" mailhog: image: mailhog/mailhog ports: - "11025:1025" - "18025:8025" ================================================ FILE: books/python-architecture-patterns/notes.md ================================================ [go back](https://github.com/pkardas/learning) # Architecture Patterns with Python: Enabling Test-Driven Development, Domain-Driven Design, and Event-Driven Microservices Book by Harry Percival and Bob Gregory Code here: [click](.) - [Introduction](#introduction) - [Chapter 1: Domain Modeling](#chapter-1-domain-modeling) - [Chapter 2: Repository Pattern](#chapter-2-repository-pattern) - [Chapter 3: On Coupling and Abstractions](#chapter-3-on-coupling-and-abstractions) - [Chapter 4: FlaskAPI and Service Layer](#chapter-4-flaskapi-and-service-layer) - [Chapter 5: TDD in High Gear and Low Gear](#chapter-5-tdd-in-high-gear-and-low-gear) - [Chapter 6: Unit of Work Pattern](#chapter-6-unit-of-work-pattern) - [Chapter 7: Aggregates and Consistency Boundaries](#chapter-7-aggregates-and-consistency-boundaries) - [Chapter 8: Events and the Message Bus](#chapter-8-events-and-the-message-bus) - [Chapter 9: Going to Town the Message Bus](#chapter-9-going-to-town-the-message-bus) - [Chapter 10: Commands and Command Handler](#chapter-10-commands-and-command-handler) - [Chapter 11: Event-Driven Architecture: Using Events to Integrate Microservices](#chapter-11-event-driven-architecture-using-events-to-integrate-microservices) - [Chapter 12: Command-Query Responsibility Segregation (CQRS)](#chapter-12-command-query-responsibility-segregation-cqrs) - [Chapter 13: Dependency Injection (and Bootstrapping)](#chapter-13-dependency-injection-and-bootstrapping) - [Epilogue](#epilogue) - [Appendix](#appendix) ## Introduction Software systems tend toward chaos. When we first start building a new system, we have grand ideas that our code will be clean and well-ordered, but iver time we find that it gathers cruft and edge cases and ends up a confusing morass of manager classes and util modules. Fortunately, the techniques to avoid creating a big ball of mud aren't complex. Encapsulation covers two closely related ideas: simplifying behavior and hiding data. We encapsulate behavior by identifying a task that needs to be done in our code and giving that task a well-defined object or function. We call that object ro function an abstraction. Encapsulating behavior by using abstractions is a powerful tool for making code more expressive, more testable, and easier to maintain. Encapsulation and abstraction help us by hiding details and protecting the consistency of our data, but wee= need to pay attention to the interactions between our objects and functions. When one function, module or object uses another, we say that the one depends on the other. Those dependencies form a kind of network or graph. For example: Presentation Layer -> Business Logic -> Database Layer. Layered architecture is the most common pattern for building business software. The Dependency Inversion Principle: 1. High-level modules should not depend on low-level modules. Both should depend on abstractions. 2. Abstractions should not depend on details. Instead, details should depend on abstractions. High-level modules are the code that your organization really cares about. The high-level modules of a software system are the functions, classes, and packages that deal with our real-world concepts. By contract, low-level modules are the code that your organization doesn't care about. If payroll runs on time, your business is unlikely to care whether that is a coron job or a transient function running on Kubernetes. > All problems in computer science can be solved by adding another level of indirection ~ David Wheeler. We don't want business logic changes to slow down because they arte closely coupled to low-level infrastructure details. Adding an abstraction between them allows the two to change independently of each other. ## Chapter 1: Domain Modeling The _domain_ is a fancy word of saying _the problem you are trying to solve_. A _model_ is a map of a process or phenomenon that captures a useful property. In a nutshell, DDD says that the most important thing about software is that it provides a useful model of a problem. If we get that model right, our software delivers value and makes new things possible. When we hear our business stakeholders using unfamiliar words, or using terms in a specific way, we should listen to understand the deeper meaning and encode their hard-won experience into our software. Choose memorable identifiers for our objects so that the examples are easier to talk about. Whenever we have a business concept that has data but has no identity, we often choose to represent it using the Value Object pattern. A value object is any domain object that is uniquely identified as by the data it holds, we usually make them immutable. Named tuples and frozen data classes are a great tool for this. Entities, unlike values, have identity equality. We can change their values, and they are still recognizably the same thing. Batches, in our example, are entities. We can allocate lines to a batch, or change the date that we expect it to arrive, and it will still be the same entity. We usually make this explicit in code by implementing equality operators on entities. For value objects, the hash should be based on all attributes, and we should ensure that the objects are immutable. For entities, the simplest option is to say that the hash is None, meaning that the object is not hashable and cannot, for example be used in a set. If for some reason you decide to use set or dict operations with entities, the hash should be based on the attributes, that defines the entity's unique identity over time. Exceptions can express domain concepts too. ## Chapter 2: Repository Pattern Repository Pattern - a simplifying abstraction over data storage, allowing us to decouple our model layer from the data layer. This simplifying abstraction makes our system more testable by hiding the complexities of the database. It hides the boring details of data access by pretending that all of our data is in memory. This pattern is very common in DDD. Layered architecture is a common approach to structuring a system that has a UI, some logic, and a database. Onion architecture - model being inside, and dependencies flowing inward to it. ORM gives us persistence ignorance - fancy model doesn't need to know anything about how data is loaded or persisted. Using and ORM is already an example of the DIP. Instead of depending on hardcoded SQL, we depend on abstraction - the ORM. The simplest repository has just two methods: - add - to put a new item in the repository - get - to return a previously added item. One of the biggest benefits of the Repository pattern is the possibility to build a fake repository. > Building fakes for your abstractions is an excellent way to get design feedback: if it's hard to fake, the abstraction > is probably too complicated. Simple CRUD wrapper around a database, don't need a domain model or a repository. Repository Pattern Recap: - Apply dependency inversion to your ORM - Domain model should be free of infrastructure concerns, so your ORM should import your model, and not the other way around. - The Repository pattern is a simple abstraction around permanent storage - The repository gives you the illusion of a collection of in-memory objects. It makes it easy to create a FakeRepository for testing and to swap fundamental details of your infrastructure without disrupting your code application. ## Chapter 3: On Coupling and Abstractions When we are unable to change component A for fear of breaking component B, we say that the components have become coupled. Globally, coupling increases the risk and the cost of changing our code, sometimes to the point where we feel unable to make any changes at all. We can reduce the degree of coupling within a system by abstracting away the details. According to authors it is better to use fake resources instead of mocks: - Mocks are used to verify how something gets used, they have methods like `assert_called_once_with`. They are associated with London-school TDD. - Fakes are working implementations of the thing they are replacing, but they are designed for use only in tests. They wouldn't work in real life. You can use them to make assertions about the end state of a system rather than the behaviours along the way, so they are associated with classic-style TDD. TDD is a design practice first and a testing practice second. The tests act as a record of our design choices and serve to explain the system to use when we return to the code after a long absence. Tests that use too many mocks get overwhelmed with setup code that hides the story we care about. Links: - [YOW! Conference 2017 - Steve Freeman - Test Driven Development: That’s Not What We Meant](https://www.youtube.com/watch?v=B48Exq57Zg8) - [Edwin Jung - Mocking and Patching Pitfalls](https://www.youtube.com/watch?v=Ldlz4V-UCFw) ## Chapter 4: FlaskAPI and Service Layer Service Layer - extract logic from the endpoint, because it might be doing too much - validating input, handling errors, committing. Our high-level module, the service layer, depends on the repository abstraction. And the details of the implementation for our specific choice of persistent storage also depend on the same abstraction. The responsibilities of the ~~Flask~~ FastAPI app are just standard web stuff - per-request session management, parsing information out of POST parameters, response status codes and JSON. All the orchestration logic is in the use case/service layer, and the domain logic stays in the domain. Application service - its job is to handle requests from the outside world and to orchestrate an operation. Drives the application by following a bunch of simple steps: - Get some data from the database - Update the domain model - Persist any changes This is the kind of boring work that has to happen for every operation in your system, and keeping it separate from business logic helps to keep things tidy. Domain service - this is the name for a piece of logic that belongs in the domain model but doesn't sit naturally inside a stateful entity or value object. ## Chapter 5: TDD in High Gear and Low Gear Once you implement domain modeling and the service layer, you really actually can get to a stage where unit tests outnumber integration and end-to-end tests by an order of magnitude. Tests are supposed to help us change our system fearlessly, but often we see teams writing too many tests against their domain model. This causes problems when they come to change their codebase and find that they need to update tens or even hundreds of unit tests. The service layer forms an API for our system that we can drive in multiple ways. Testing against this API reduces the amount of code that we need to change when we refactor our domain model. If we restrict ourselves to testing only against the service layer, we will not have any tests that directly interact with "private" methods or attributes on our model objects, which leaves us freer to refactor them. Most of the time, when we are adding a new feature or fixing a bug, we don't need to make extensive changes to the domain mode. IN these cases, we prefer to write tests against service because of the lower coupling and higher coverage. When starting a new project or when hitting a particularly gnarly problem, we will drop back down to writing tests against the domain model, so we get better feedback and executable documentation of our intent. Metaphor of shifting gears - when starting a journey, the bicycle needs to be in a low hear, so it can overcome inertia. Once we are off and running, we can go faster and more efficiently by changing into a high gear. But if we suddenly encounter a steep hill or are forced to slow down by a hazard, we again drop to a low gear until we can pick up speed again. Rules of Thumb for Different Types of Test: 1. Aim for one end-to-end test per feature - the objective is to demonstrate that the feature works, and that all the moving parts are glued together. 2. Write the bulk of your tests against the service layer - these end-to-end tests offer a good trade-off between coverage, runtime, and efficiency. 3. Maintain a small core of tests written against your domain model - these tests have highly focused coverage and are more brittle, but they have the highest feedback. Don't be afraid to delete these tests if the functionality is later covered by tests at the service layer. 4. Error handling counts as a feature - ideally, your application will be structured such that all errors bubble up to your entrypoints are handled in the same way. This means you need to test ony the happy path for each feature, and to reserve one end-to-end test for all unhappy paths. Express your service layer in terms of primitives rather than domain objects. ## Chapter 6: Unit of Work Pattern If the Repository pattern is our abstraction over persistent storage, the Unit of Work pattern is our abstraction over the idea of atomic operations. It will allow us to decouple our service layer from the data layer. Unit of Work acts as a single entrypoint to our persistent storage, and it keeps track of what objects were loaded and of the latest state. Unit of Work and Repository classes are collaborators. > Don't mock what you don't own Rule of thumb that forces us to build these simple abstractions over messy subsystems. This encourages us to think carefully about or designs. It is better to require explicit commit, so we can choose when to flush state. The default behaviour is to not change anything, this makes software safe by default. There is one code path that leads to changes in the system: total success and an explicit commit. Any other code path, any exception, any early exit from the UoW's scope leads to safe state. You should always feel free to throw away tests if you think they are not going to add value longer term. SQLAlchemy already uses a Unit Of Work in the shape of Session object (track changes to the entity, and when the session is flushed, all your changes are persisted together). Then, why bother? The Session API is very rich, Unit Of Work can simplify the session to its essential core: start, commit or throw away. Besides, our Unit Of Work can access Repository object. Unit of Work Pattern Recap: - _The Unit of Work Pattern is an abstraction around data integrity_ - It helps to enforce the consistency of our domain model, and improves performance, by letting us perform a single flush operation at the end of an operation. - _It works closely with the Repository Pattern and Service Layer patterns_ - The Unit of Work pattern completes our abstractions over data access by representing atomic updates. Each of our service-layer use cases runs in a single unit of work that succeeds of rails as a block. - _This is a lovely case for a context manager_ - Context managers are an idiomatic way of defining scope in Python. We can use a context manager to automatically roll back our work at the end of a request, which means the system is safe by default. - _SQLAlchemy already implements this pattern_ - We introduce an even simpler abstraction over the SQLAlchemy Session object in order to "narrow" the interface between the ORM and our code. This helps keep us loosely coupled. ## Chapter 7: Aggregates and Consistency Boundaries Constraint is a rule that restricts the possible states of our model can get into, while an invariant is defined a little more precisely as a condition that is always true. The Aggregate pattern - a design pattern from the DDD community that helps us to solve concurrency issues. An aggregate is just a domain object that contains other domain objects and lets us treat the whole collection as a single unit. > An aggregate is a cluster of associated objects that we treat as a unit for the purpose of data changes. We have to choose right granularity for our aggregate. Candidates: Shipment, Cart, Stock, Product. Bounded contexts were invented as a reaction against attempts to capture entire businesses into a single model. Attributes needed in one context are irrelevant in another. Concepts with the same name can have entirely different meanings in different contexts. It is better to have several models, draw boundaries around each context, and handle the translation between different contexts explicitly. This concept translates very well to the world of microservices, where each microservice is free to have its own concept of "customer" and its own rules for translating that to and from other microservices it integrates with. Aggregates should be the only way to get to out model. The Aggregate pattern is designed to help manage some technical constraints around consistency and performance. Version numbers are just one way to implement optimistic* locking. Optimistic - our default assumption is that everything will be fine when two users want to make changes to the database. We think it will be unlikely that they will conflict each other. We let them go ahead and just make sure we have a way to notice if there is a problem. Pessimistic - works under the assumption that two users are to cause conflicts, and we want to prevent conflicts in all cases, so we lock everything just to be safe. In our example, that would mean locking the whole `batches` table or using `SELECT FOR UPDATE`. With pessimistic locking, you don't need to think about handling failures because the database will prevent them. The usual way to handle a failure is to retry the operation from the beginning. Aggregates and Consistency Boundaries Recap: - _Aggregates are your entrypoints into the domain model_ - By restricting the number of ways that things can be changed, we make the system easier to reason about. - _Aggregates are in charge of a consistency boundaries_ - An aggregate's job is to be able to manage our business rules about invariants as they apply to a group of related objects. It is the aggregate's job to check that the objects within its remit are consistent with each other and with our rules, and to reject changes that would break the rules. - Aggregates and concurrency issues go together - When thinking about implementing these consistency checks, we end up thinking about transactions and locks. Choosing the right aggregate is about performance as well as conceptual organization of your domain. ## Chapter 8: Events and the Message Bus Reporting, permissions and workflows touching zillions of objects make a mess of our codebase. > Rule of thumb: if you can't describe what your function does without using words like "then" or "and", you might be > violating the SRP. A message bus gives us a nice way to separate responsibilities when we have to take multiple actions in response to a request. Domain Events and the Message Bus Recap: - _Events can help with the single responsibility principle_ - Code gets tangled up when we mix multiple concerns in one place. Events can help us to keep things tidy by separating primary use cases from secondary ones. We also use events for communicating between aggregates so that we don't need to run long-running transactions that lock against multiple tables. - _A message bus routes messages to handlers_ - You can think of a message bus as a dict that maps from events to their consumers. It doesn't "know" anything bout the meaning of events; it is just a piece of dumb infrastructure for getting messages around the system. - _Option 1: Service layer raises events and passes them to message bus_ - The simplest way to start using events is your system is to raise them from handlers by calling `bus.handle(event)` after you commit your unit of work. - _Option 2: Domain model raises events, service layer passes them to message bus_ - The logic about when to raise an event really should live with the model, so we can improve our system's design and testability by raising events from the domain model. It is easy for our handlers to collect events off the model objects after commit and pass them to the bus. - _Option 3: UoW collects events from aggregates and passes the to message bus_ - Adding `bus.handle(aggregate.events)` to every handler is annoying, so we can tidy up by making our unit of work responsible for raising events that were raised by loaded objects. This is the most complex design and might rely on ORM magic, but it is clean and easy to use once set up. ## Chapter 9: Going to Town the Message Bus If we rethink our API calls as capturing events, the service-layer functions can be event handlers too, and we no longer need to make a distinction between internal and external event handlers. Multiple database transactions can cause integrity issues. Something could happen that means the first transaction completes but the second one does not. Events are simple dataclasses that define the data structures for inputs and internal messages within our system. This is quite powerful from a DDD standpoint, since events often translate very well into business language. Handlers are the way we react to the events. They can call down to our model or call out to external services. We can define multiple handlers for a single event if we want to. Handlers can also raise other events. This allows us to be very granular about what a handler does and really stick to the SRP. ## Chapter 10: Commands and Command Handler Commands are a type of message - instructions sent by one part of a system to another. We usually represent commands with dumb data structures and can handle them in much the same way as events. Commands are sent by one actor to another specific actor with the expectation that a particular thing will happen as a result. When we post a form to an API handler, we are sending a command. We name commands with imperative mood verb phrases like "allocate stock" or "delay shipment". Events are broadcast by an actor to all interested listeners. We often use events to spread the knowledge about successful commands. We name events with past-tense verb phrases like "order allocated to stock" or "shipment delayed". How to mitigate problems caused by the lost messages? The system might be left in an inconsistent state. In our allocation service we have already taken steps to prevent that from happening. We have carefully identified aggregates that act as consistency boundaries, and whe have introduced a UoW that manages the atomic success or failure of an update to an aggregate. When a user wants to make the system do something, we represent their request as a command. That command should modify a single aggregate and either succeed or fail in totality. Any other bookkeeping, cleanup and notification we need to do can happen via an event. We don't require the event handlers to succeed in order for the command to be successful. We raise events about an aggregate after we persist our state to the database. It is OK for events to fail independently from the commands that raised them. Tenacity is a Python library that implements common patterns for retrying. ## Chapter 11: Event-Driven Architecture: Using Events to Integrate Microservices Often, first instinct when migrating an existing application to microservices, is to split system into _nouns_. Style of architecture, where we create a microservice per database table and treat out HTTP APIa as CRUD interfaces to anemic models, is the most common initial way for people to approach service-oriented design. This works fine for systems that are very simple, but it can quickly degrade into a distributed ball of mud. When two things have to be changed together, we say that they are coupled. We can never completely avoid coupling, except by having our software not talk to any other software. What we want is to avoid inappropriate coupling. How do we get appropriate coupling? We should think in terms of verbs, not nouns. Our domain model is about modeling a business process. It is not a static data about a thing, it is a model of a verb. Instead of thinking about a system for orders and a system for batches, we think about a system for allocating and ordering. Microservices should be consistency boundaries. That means we don't need to rely on synchronous calls. Each service accepts commands from the outside world and raises events to record the result. Other services can listen to those events to trigger the next steps in the workflow. Things can fail independently, it is easier to handle degraded behavior - we can still take orders if the allocation service is having a bad day. Secondly, we are reducing the strength of coupling between our systems. If we need to change the order of operations or to introduce new steps in the process, we can do that locally. Events can come from the outside, but they can also be published externally. > Event notification is nice because it implies a low level of coupling, and is pretty simple to set up. It can become > problematic, however, if there really is a logical flow that runs over various event notifications. It can be hard to > see such flow as it is not explicit in any program text. This can make it hard to debug and modify. ~ Martin Fowler. ## Chapter 12: Command-Query Responsibility Segregation (CQRS) Reads (queries) and writes (commands) are different, so they should be treated differently. Most users are not going to buy your product, they are just viewers. We can make reads eventually consistent in order to make them perform better. All distributed systems are inconsistent. As soon as you have a web server and two customers, you have the potential for stale data. No matter what we do, we are always going to find that our software systems are inconsistent with reality, and so we will always need business process to cope with these edge cases. It is OK to trade performance for consistency on the read side, because stale data is essentially unavoidable. READS: Simple read, highly cacheable, can be stale. WRITE: Complex business logic, uncacheable, must be transactionally consistent. Post/Redirect/Get Pattern - In this technique, a web endpoint accepts an HTTP POST and responds with a redirect to see the result. For example, we might accept a POST to /batches to create a new batch and redirect user to /batches/123 to see their newly created batch. This approach fixes the problems that arise whe users refresh the results page in their browser. It can lead to our users double-submitting data and thus buying two sofas when they needed only one. This technique is a simple example of CQS. In CQS we follow one simple rule - functions should either modify state or answer questions. We can apply the same design by returning 201 Created or 202 Accepted, with a Location header containing the URI of our new resources. ORM can expose us to performance problems. SELECT N+1 Problem is a common performance problem with ORMs - when retrieving a list of objects, your ORM will often perform an initial query to get all IDs of the objects it needs, and then issue individual queries for each object to retrieve their attributes. This is especially likely if there are any foreign-key relationships on your objects. Even with well-tuned indexes, a relational database uses a lot of CPU to perform joins. The fastest queries will always be `SELECT * FROM table WHERE condition`. More than raw speed, this approach buys us scale. Read-only stores can be horizontally scaled out. Read model can be implemented using Redis. As domain model becomes richer and more complex, a simplified read model become compelling. ## Chapter 13: Dependency Injection (and Bootstrapping) Mocks tightly couple us to the implementation. By choosing to monkeypatch `email.send_mail`, we are tied to doing `import email`, and if we ever want to do `from email import send_mail`, we will have to change all our mocks. Declaring explicit dependencies is unnecessary, and using them would make our application code marginally more complex. But in return we would get tests that are easier to write and manage. > Explicit is better than implicit. Putting all the responsibility for passing dependencies to the right handler onto the message bus feels like a violation of the SRP. Instead, we will reach for a pattern called Composition Root (a bootstrap script), and we will do a bit of " manual DI" (dependency inversion without a framework). Setting up dependency injection is just one of many typical setup activities that you need to do when starting your app. Putting this all together into a bootstrap script is often a good idea. The bootstrap is also good as a place to provide sensible default configuration for your adapters, and as a single place to override those adapters with fakes for your tests. ## Epilogue Making complex changes to a system is often an easier sell if you link it to feature work. Perhaps you are launching a new product or opening your service to new markets? This is the right time to spend engineering resources on fixing the foundations. With a six-month project to deliver, it is easier to make the argument for three weeks of cleanup work. The Strangler Fig pattern involves creating a new system around the edges of an old system, while keeping it running. Bits of old functionality are gradually intercepted and replaced, until the old system is left doing nothing at all and can be switched off. Focus on a specific problem and ask yourself how you can put the relevant ideas to use, perhaps in an initially limited and imperfect fashion. Reliable messaging is hard: Redis pub/sub is not reliable and should not be used as a general-purpose messaging tool. We explicitly choose small, focused transactions that can fail independently. ## Appendix - Entity - A domain object whose attributes may change but that has a recognizable identity over time. - Value object - An immutable domain object whose attributes entirely define it. It is fungible with other identical objects. - Aggregate - Cluster of associated objects that we treat as a unit for the purpose of data changes. Defines and enforces a consistency boundary. - Event - Represents something that happened. - Command - Represents a job the system should perform. - Unit of work - Abstraction around data integrity. Each unit of work represents an atomic update. Makes repository available. Tracks new events on retrieved aggregates. - Repository - Abstraction around persistent storage. Each aggregate has its own repository. Docker: Mounting our source and test code as `volumes` means we don't need to rebuild our containers every time we make a code change. Postel's Law (robustness principle): > Be liberal in what you accept, and conservative in what you emit Tolerant Reader Pattern: Validate as little as possible. Read only the fields you need, and don't overspecify their contents. This will help your system stay robust when other systems change over time. Resist the temptation to share message definitions between systems: instead make it easy to define the data you depend on. If you are in change of an API that is open to the public on the big bad internet, there might be good reasons to be more conservative about what inputs you allow. If validation is needed, do it at the edge of the system in order to avoid polluting domain model. Bear in mind that invalid data wandering through your system is a time bomb, the deeper it gets, the more damage it can do. ================================================ FILE: books/python-architecture-patterns/requirements.txt ================================================ pytest==6.2.5 mypy==0.931 flake8==4.0.1 SQLAlchemy==1.4.31 fastapi==0.73.0 sqlmodel==0.0.6 requests==2.27.1 psycopg2==2.9.3 uvicorn==0.17.4 redis==4.1.4 types-redis==4.1.17 tenacity==8.0.1 ================================================ FILE: books/python-architecture-patterns/setup.cfg ================================================ [tool:pytest] python_paths = . testpaths = tests norecursedirs = .* addopts = -sl filterwarnings = ignore::DeprecationWarning ignore::PendingDeprecationWarning [mypy] python_version = 3.10 ignore_missing_imports = True strict_optional = False [mypy-app.cache] ignore_errors = True [flake8] max-line-length = 180 max-complexity = 10 format = pylint show-source = True statistics = True ================================================ FILE: books/python-architecture-patterns/src/__init__.py ================================================ ================================================ FILE: books/python-architecture-patterns/src/adapters/__init__.py ================================================ ================================================ FILE: books/python-architecture-patterns/src/adapters/notifications.py ================================================ from abc import ( ABC, abstractmethod, ) import smtplib from src import config DEFAULT_HOST = config.get_email_host_and_port()["host"] DEFAULT_PORT = config.get_email_host_and_port()["port"] class AbstractNotifications(ABC): @abstractmethod def send(self, destination, message): raise NotImplementedError class EmailNotifications(AbstractNotifications): def __init__(self, smtp_host=DEFAULT_HOST, port=DEFAULT_PORT): self.server = smtplib.SMTP(smtp_host, port=port) self.server.noop() def send(self, destination, message): self.server.sendmail( from_addr="allocations@example.com", to_addrs=[destination], msg=f"Subject: allocation service notification\n{message}", ) ================================================ FILE: books/python-architecture-patterns/src/adapters/orm.py ================================================ from sqlmodel import ( Field, SQLModel, ) class AllocationsView(SQLModel, table=True): id: int = Field(primary_key=True) order_id: str sku: str batch_ref: str def create_db_and_tables(engine): SQLModel.metadata.create_all(engine) def clean_db_and_tables(engine): SQLModel.metadata.drop_all(engine) ================================================ FILE: books/python-architecture-patterns/src/adapters/redis_publisher.py ================================================ from redis.client import Redis from src import config from src.domain.events import Event r = Redis(**config.get_redis_host_and_port()) def publish(channel: str, event: Event): r.publish(channel, event.json()) ================================================ FILE: books/python-architecture-patterns/src/adapters/repository.py ================================================ from typing import ( Optional, Protocol, Set, ) from sqlmodel import ( Session, select, ) from src.domain.model import ( Batch, Product, ) class AbstractRepository(Protocol): def add(self, product: Product): ... def get(self, sku: str) -> Optional[Product]: ... def get_by_batch_ref(self, ref: str) -> Optional[Product]: ... class Repository(AbstractRepository): def __init__(self, session: Session): self.session = session def add(self, product: Product): self.session.add(product) self.session.commit() def get(self, sku: str) -> Optional[Product]: return self.session.exec(select(Product).where(Product.sku == sku)).first() def get_by_batch_ref(self, ref: str) -> Optional[Product]: return self.session.exec(select(Product).join(Batch).where(Batch.reference == ref)).first() class TrackingRepository(AbstractRepository): seen: Set[Product] def __init__(self, repo: AbstractRepository): super().__init__() self.seen = set() self._repo = repo def add(self, product: Product): self._repo.add(product) self.seen.add(product) def get(self, sku: str) -> Optional[Product]: product = self._repo.get(sku) if product: self.seen.add(product) return product def get_by_batch_ref(self, ref: str) -> Optional[Product]: if product := self._repo.get_by_batch_ref(ref): self.seen.add(product) return product ================================================ FILE: books/python-architecture-patterns/src/app.py ================================================ from fastapi import ( FastAPI, Response, status, ) from src import views from src.bootstrap import bootstrap from src.domain import commands from src.domain.model import ( Batch, OrderLine, OutOfStock, ) from src.service_layer.handlers import InvalidSku bus = bootstrap() api = FastAPI() @api.post("/allocate") async def allocate_endpoint(order_line: OrderLine, response: Response): try: bus.handle(commands.Allocate(order_id=order_line.order_id, sku=order_line.sku, qty=order_line.qty)) except (OutOfStock, InvalidSku) as e: response.status_code = status.HTTP_400_BAD_REQUEST return {"message": str(e)} return {"message": "ok"} @api.post("/add_batch") async def add_batch_endpoint(batch: Batch): bus.handle(commands.CreateBatch(ref=batch.reference, sku=batch.sku, qty=batch.purchased_quantity, eta=batch.eta)) return {"message": "ok"} @api.post("/allocate/{order_id}") async def allocate_view_endpoint(order_id: str, response: Response): if result := views.allocations(order_id, bus.uow): return result response.status_code = status.HTTP_400_BAD_REQUEST return response ================================================ FILE: books/python-architecture-patterns/src/bootstrap.py ================================================ import inspect from typing import Callable from sqlalchemy.engine import Engine from sqlmodel import create_engine from src import config from src.adapters import redis_publisher from src.adapters.notifications import ( AbstractNotifications, EmailNotifications, ) from src.adapters.orm import create_db_and_tables from src.service_layer.message_bus import ( COMMAND_HANDLERS, EVENT_HANDLERS, MessageBus, ) from src.service_layer.unit_of_work import ( AbstractUnitOfWork, UnitOfWork, ) def bootstrap(start_orm: bool = True, engine: Engine = create_engine(config.get_postgres_uri()), uow: AbstractUnitOfWork = UnitOfWork(), notifications: AbstractNotifications = EmailNotifications(), publish: Callable = redis_publisher.publish): if start_orm: create_db_and_tables(engine) dependencies = {"uow": uow, "notifications": notifications, "publish": publish} injected_event_handlers = { event_type: [ inject_dependencies(handler, dependencies) for handler in event_handlers ] for event_type, event_handlers in EVENT_HANDLERS.items() } injected_command_handlers = { command_type: inject_dependencies(handler, dependencies) for command_type, handler in COMMAND_HANDLERS.items() } return MessageBus(uow=uow, event_handlers=injected_event_handlers, command_handlers=injected_command_handlers) def inject_dependencies(handler, dependencies): params = inspect.signature(handler).parameters deps = { name: dependency for name, dependency in dependencies.items() if name in params } return lambda message: handler(message, **deps) ================================================ FILE: books/python-architecture-patterns/src/config.py ================================================ import os def get_postgres_uri(): host = os.environ.get("DB_HOST", "localhost") port = 54321 if host == "localhost" else 5432 password = os.environ.get("DB_PASSWORD", "abc123") user, db_name = "allocation", "allocation" return f"postgresql://{user}:{password}@{host}:{port}/{db_name}" def get_api_url(): host = os.environ.get("API_HOST", "localhost") port = 80 return f"http://{host}:{port}" def get_redis_host_and_port(): host = os.environ.get("REDIS_HOST", "localhost") port = 63791 if host == "localhost" else 6379 return dict(host=host, port=port) def get_email_host_and_port(): host = os.environ.get("EMAIL_HOST", "localhost") port = 11025 if host == "localhost" else 1025 http_port = 18025 if host == "localhost" else 8025 return dict(host=host, port=port, http_port=http_port) ================================================ FILE: books/python-architecture-patterns/src/domain/__init__.py ================================================ ================================================ FILE: books/python-architecture-patterns/src/domain/commands.py ================================================ from dataclasses import dataclass from datetime import date from typing import Optional class Command: pass @dataclass class Allocate(Command): order_id: str sku: str qty: int @dataclass class CreateBatch(Command): ref: str sku: str qty: int eta: Optional[date] = None @dataclass class ChangeBatchQuantity(Command): ref: str qty: int ================================================ FILE: books/python-architecture-patterns/src/domain/events.py ================================================ from pydantic import BaseModel class Event(BaseModel): pass class OutOfStock(Event): sku: str class Allocated(Event): order_id: str sku: str qty: int batch_ref: str class Deallocated(Event): order_id: str sku: str qty: int class BatchQuantityChanged(Event): batch_ref: str qty: int ================================================ FILE: books/python-architecture-patterns/src/domain/model.py ================================================ from datetime import date from typing import ( Iterable, List, Optional, Union, cast, ) from pydantic import PrivateAttr from pydantic.fields import ModelPrivateAttr from sqlmodel import ( Field, Relationship, SQLModel, ) from src.domain import ( commands, events, ) Message = Union[commands.Command, events.Event] class OutOfStock(Exception): pass class OrderLine(SQLModel, table=True): order_id: str sku: str qty: int # DB-specific fields: id: Optional[int] = Field(default=None, primary_key=True) batch_id: Optional[int] = Field(default=None, foreign_key="batch.id") batch: Optional["Batch"] = Relationship(back_populates="allocations") class Batch(SQLModel, table=True): reference: str sku: str purchased_quantity: int eta: Optional[date] allocations: List["OrderLine"] = Relationship(back_populates="batch") # DB-specific fields: id: Optional[int] = Field(default=None, primary_key=True) product_id: Optional[int] = Field(default=None, foreign_key="product.id") product: Optional["Product"] = Relationship(back_populates="batches") def __eq__(self, other): if not isinstance(other, Batch): return False return other.reference == self.reference def __hash__(self): return hash(self.reference) def __gt__(self, other): if self.eta is None: return False if other.eta is None: return True return self.eta > other.eta def allocate(self, order_line: OrderLine) -> None: if not self.can_allocate(order_line): return if order_line in self.allocations: return self.allocations.append(order_line) def deallocate(self, order_line: OrderLine) -> None: if order_line not in self.allocations: return self.allocations.remove(order_line) def deallocate_one(self): return self.allocations.pop() @property def allocated_quantity(self) -> int: return sum(line.qty for line in self.allocations) @property def available_quantity(self) -> int: return self.purchased_quantity - self.allocated_quantity def can_allocate(self, order_line: OrderLine) -> bool: return self.sku == order_line.sku and self.available_quantity >= order_line.qty class Product(SQLModel, table=True): sku: str batches: List["Batch"] = Relationship(back_populates="product") # DB-specific fields: id: Optional[int] = Field(default=None, primary_key=True) version_number: int = 0 # DB excluded fields: _messages: ModelPrivateAttr = PrivateAttr(default=[]) def __hash__(self): return hash(self.sku) @property def messages(self) -> List[Message]: return self._messages.default def allocate(self, order_line: OrderLine) -> Optional[str]: try: batch = next(b for b in sorted(cast(Iterable, self.batches)) if b.can_allocate(order_line)) except StopIteration: self.messages.append(events.OutOfStock(sku=order_line.sku)) return None batch.allocate(order_line) self.version_number += 1 self.messages.append(events.Allocated( order_id=order_line.order_id, sku=order_line.sku, qty=order_line.qty, batch_ref=batch.reference )) return batch.reference def change_batch_quantity(self, ref: str, qty: int): batch = next(b for b in self.batches if b.reference == ref) batch.purchased_quantity = qty while batch.available_quantity < 0: line = batch.deallocate_one() self.messages.append(commands.Allocate(order_id=line.order_id, sku=line.sku, qty=line.qty)) ================================================ FILE: books/python-architecture-patterns/src/redis_consumer.py ================================================ import json from typing import Dict from redis.client import Redis from src import config from src.bootstrap import bootstrap from src.domain import ( commands, events, ) from src.service_layer.message_bus import MessageBus r = Redis(**config.get_redis_host_and_port()) def main(): bus = bootstrap() pubsub = r.pubsub(ignore_subscribe_messages=True) pubsub.subscribe("change_batch_quantity") for m in pubsub.listen(): _handle_change_batch_quantity(m, bus) def _handle_change_batch_quantity(message: Dict, bus: MessageBus): event = events.BatchQuantityChanged(**json.loads(message["data"])) cmd = commands.ChangeBatchQuantity(ref=event.batch_ref, qty=event.qty) bus.handle(message=cmd) if __name__ == "__main__": main() ================================================ FILE: books/python-architecture-patterns/src/service_layer/__init__.py ================================================ ================================================ FILE: books/python-architecture-patterns/src/service_layer/handlers.py ================================================ from src.adapters import redis_publisher from src.adapters.notifications import AbstractNotifications from src.domain import ( commands, events, ) from src.domain.model import ( Batch, OrderLine, Product, ) from src.service_layer.unit_of_work import ( AbstractUnitOfWork, UnitOfWork, ) class InvalidSku(Exception): pass def allocate(command: commands.Allocate, uow: AbstractUnitOfWork) -> str: order_line = OrderLine(order_id=command.order_id, sku=command.sku, qty=command.qty) with uow: product = uow.products.get(sku=command.sku) if not product: raise InvalidSku(f"Invalid SKU: {command.sku}") batch_ref = product.allocate(order_line) uow.commit() return batch_ref def add_batch(command: commands.CreateBatch, uow: AbstractUnitOfWork): with uow: product = uow.products.get(command.sku) if not product: product = Product(sku=command.sku, batches=[]) uow.products.add(product) product.batches.append(Batch(reference=command.ref, sku=command.sku, purchased_quantity=command.qty, eta=command.eta)) uow.commit() def change_batch_quantity(command: commands.ChangeBatchQuantity, uow: AbstractUnitOfWork): with uow: product = uow.products.get_by_batch_ref(command.ref) product.change_batch_quantity(ref=command.ref, qty=command.qty) uow.commit() def send_out_of_stock_notification(event: events.OutOfStock, notifications: AbstractNotifications): notifications.send("stock@made.com", f"Out of stock for {event.sku}") def publish_allocated_event(event: events.Allocated, uow: AbstractUnitOfWork): redis_publisher.publish("line_allocated", event) def add_allocation_to_read_model(event: events.Allocated, uow: UnitOfWork): with uow: uow.session.execute( """ INSERT INTO allocationsview (order_id, sku, batch_ref) VALUES (:order_id, :sku, :batch_ref) """, dict(order_id=event.order_id, sku=event.sku, batch_ref=event.batch_ref), ) uow.commit() def remove_allocation_from_read_model(event: events.Deallocated, uow: UnitOfWork): with uow: uow.session.execute( """ DELETE FROM allocationsview WHERE order_id = :order_id AND sku = :sku """, dict(order_id=event.order_id, sku=event.sku), ) uow.commit() def reallocate(event: events.Deallocated, uow: AbstractUnitOfWork, ): with uow: product = uow.products.get(sku=event.sku) product.messages.append(commands.Allocate(**event.dict())) uow.commit() ================================================ FILE: books/python-architecture-patterns/src/service_layer/message_bus.py ================================================ import logging from typing import ( Callable, Dict, List, Type, Union, ) from src.domain import ( commands, events, ) from src.service_layer import handlers from src.service_layer.unit_of_work import AbstractUnitOfWork logger = logging.getLogger(__name__) Message = Union[commands.Command, events.Event] EVENT_HANDLERS: Dict[Type[events.Event], List[Callable]] = { events.OutOfStock: [handlers.send_out_of_stock_notification], events.Allocated: [handlers.publish_allocated_event, handlers.add_allocation_to_read_model], events.Deallocated: [handlers.remove_allocation_from_read_model, handlers.reallocate] } COMMAND_HANDLERS: Dict[Type[commands.Command], Callable] = { commands.CreateBatch: handlers.add_batch, commands.ChangeBatchQuantity: handlers.change_batch_quantity, commands.Allocate: handlers.allocate, } class MessageBus: def __init__(self, uow: AbstractUnitOfWork, event_handlers: Dict[Type[events.Event], List[Callable]], command_handlers: Dict[Type[commands.Command], Callable]): self.uow = uow self.event_handlers = event_handlers self.command_handlers = command_handlers self.queue: List[Message] = [] def handle(self, message: Message): self.queue = [message] while self.queue: message = self.queue.pop(0) if isinstance(message, events.Event): self._handle_event(message) elif isinstance(message, commands.Command): self._handle_command(message) else: raise Exception(f"{message} was not an Event or Command") def _handle_event(self, event: events.Event): for handler in self.event_handlers[type(event)]: try: logger.debug(f"Handling event {event} with handler {handler}") handler(event) self.queue.extend(self.uow.collect_new_messages()) except Exception as e: logger.exception(f"Exception handling event {event}: {e}") continue def _handle_command(self, command: commands.Command): try: handler = self.command_handlers[type(command)] handler(command) self.queue.extend(self.uow.collect_new_messages()) except Exception: logger.exception("Exception handling command %s", command) raise ================================================ FILE: books/python-architecture-patterns/src/service_layer/unit_of_work.py ================================================ from __future__ import annotations from abc import ( ABC, abstractmethod, ) from typing import Optional from sqlmodel import ( Session, create_engine, ) from src.adapters.repository import ( Repository, TrackingRepository, ) from src.config import get_postgres_uri class AbstractUnitOfWork(ABC): products: TrackingRepository def __enter__(self) -> AbstractUnitOfWork: return self def __exit__(self, *args): self.rollback() def commit(self): self._commit() def collect_new_messages(self): for product in self.products.seen: while product.messages: yield product.messages.pop(0) @abstractmethod def rollback(self): raise NotImplementedError @abstractmethod def _commit(self): raise NotImplementedError def default_session(): return Session(create_engine(get_postgres_uri(), isolation_level="REPEATABLE READ")) class UnitOfWork(AbstractUnitOfWork): def __init__(self, session: Optional[Session] = None): # 'default_session()' can not be in the '__init__' because it would be evaluated only once: self.session = session if session else default_session() def __enter__(self): self.products = TrackingRepository(repo=Repository(self.session)) return super().__enter__() def __exit__(self, *args): super().__exit__(*args) self.session.close() def rollback(self): self.session.rollback() def _commit(self): self.session.commit() ================================================ FILE: books/python-architecture-patterns/src/views.py ================================================ from typing import ( Dict, List, ) from src.service_layer.unit_of_work import UnitOfWork def allocations(order_id: str, uow: UnitOfWork) -> List[Dict]: with uow: results = uow.session.execute( "SELECT sku, batch_ref FROM allocationsview WHERE order_id = :order_id", dict(order_id=order_id), ) return [dict(r) for r in results] ================================================ FILE: books/python-architecture-patterns/tests/__init__.py ================================================ ================================================ FILE: books/python-architecture-patterns/tests/conftest.py ================================================ import pytest import redis from sqlmodel import ( Session, create_engine, ) from starlette.testclient import TestClient from tenacity import ( retry, stop_after_delay, ) from src import config from src.adapters.orm import ( clean_db_and_tables, create_db_and_tables, ) from src.app import api @pytest.fixture def in_memory_db(): engine = create_engine("sqlite:///:memory:") clean_db_and_tables(engine) create_db_and_tables(engine) return engine @pytest.fixture def session(in_memory_db): create_db_and_tables(in_memory_db) yield Session(in_memory_db) clean_db_and_tables(in_memory_db) @retry(stop=stop_after_delay(10)) def wait_for_postgres_to_come_up(engine): engine.connect() @retry(stop=stop_after_delay(10)) def wait_for_redis_to_come_up(): r = redis.Redis(**config.get_redis_host_and_port()) return r.ping() @pytest.fixture(scope="session") def postgres_db(): engine = create_engine(config.get_postgres_uri()) wait_for_postgres_to_come_up(engine) clean_db_and_tables(engine) create_db_and_tables(engine) return engine @pytest.fixture def postgres_session(postgres_db): create_db_and_tables(postgres_db) yield Session(postgres_db) clean_db_and_tables(postgres_db) @pytest.fixture def client(): return TestClient(api) ================================================ FILE: books/python-architecture-patterns/tests/e2e/__init__.py ================================================ ================================================ FILE: books/python-architecture-patterns/tests/e2e/api_client.py ================================================ import json from src.domain.model import ( Batch, OrderLine, ) def post_to_allocate(client, order_id, sku, qty): return client.post("/allocate", json=json.loads(OrderLine(order_id=order_id, sku=sku, qty=qty).json())) def get_allocation(client, order_id): return client.post(f"/allocate/{order_id}") def post_to_add_batch(client, ref, sku, qty, eta): return client.post("/add_batch", json=json.loads(Batch(reference=ref, sku=sku, purchased_quantity=qty, eta=eta).json())) ================================================ FILE: books/python-architecture-patterns/tests/e2e/redis_client.py ================================================ import json import redis from src import config r = redis.Redis(**config.get_redis_host_and_port()) def subscribe_to(channel): pubsub = r.pubsub() pubsub.subscribe(channel) confirmation = pubsub.get_message(timeout=3) assert confirmation["type"] == "subscribe" return pubsub def publish_message(channel, message): r.publish(channel, json.dumps(message)) ================================================ FILE: books/python-architecture-patterns/tests/e2e/test_app.py ================================================ from datetime import date from uuid import uuid4 from tests.e2e.api_client import ( get_allocation, post_to_add_batch, post_to_allocate, ) def random_suffix(): return uuid4().hex[:6] def random_sku(name=''): return f"sku-{name}-{random_suffix()}" def random_batch_ref(name=''): return f"batch-{name}-{random_suffix()}" def random_order_id(name=''): return f"order-{name}-{random_suffix()}" def test_happy_path_returns_200_and_allocated_batch(client): sku, other_sku = random_sku(), random_sku("other") order_id = random_order_id() early_batch, later_batch, other_batch = random_batch_ref('1'), random_batch_ref('2'), random_batch_ref('3') post_to_add_batch(client, later_batch, sku, 100, date(2011, 1, 2)) post_to_add_batch(client, early_batch, sku, 100, date(2011, 1, 1)) post_to_add_batch(client, other_batch, other_sku, 100, None) response = post_to_allocate(client=client, order_id=order_id, sku=sku, qty=3) assert response.status_code == 200, response.status_code response = get_allocation(client=client, order_id=order_id) assert response.status_code == 200 assert response.json() == [{"sku": sku, "batch_ref": early_batch}] def test_unhappy_path_returns_400_and_error_message(client): unknown_order_id, unknown_sku = random_order_id(), random_sku() response = post_to_allocate(client=client, order_id=random_order_id(), sku=unknown_sku, qty=20) assert response.status_code == 400 assert response.json()["message"] == f"Invalid SKU: {unknown_sku}" response = get_allocation(client=client, order_id=unknown_order_id) assert response.status_code == 400 ================================================ FILE: books/python-architecture-patterns/tests/e2e/test_external_events.py ================================================ import json from datetime import date import pytest from tenacity import ( Retrying, stop_after_delay, ) from tests.e2e import redis_client from tests.e2e.api_client import ( post_to_add_batch, post_to_allocate, ) from tests.e2e.test_app import ( random_batch_ref, random_order_id, random_sku, ) def test_change_batch_quantity_leading_to_allocation(client): order_id, sku = random_order_id(), random_sku() earlier_batch, later_batch = random_batch_ref("old"), random_batch_ref("new") post_to_add_batch(client=client, ref=earlier_batch, sku=sku, qty=10, eta=date(2021, 1, 1)) post_to_add_batch(client=client, ref=later_batch, sku=sku, qty=10, eta=date(2021, 1, 2)) response = post_to_allocate(client=client, order_id=order_id, sku=sku, qty=10) assert response.status_code == 200 subscription = redis_client.subscribe_to("line_allocated") redis_client.publish_message("change_batch_quantity", {"batch_ref": earlier_batch, "qty": 5}) # it may take some for message to arrive: for attempt in Retrying(stop=stop_after_delay(3), reraise=True): with attempt: message = subscription.get_message(timeout=1) if not message: continue data = json.loads(message["data"]) assert data["order_id"] == order_id assert data["batch_ref"] == later_batch if not message: pytest.fail("Message not fetched") ================================================ FILE: books/python-architecture-patterns/tests/integration/__init__.py ================================================ ================================================ FILE: books/python-architecture-patterns/tests/integration/test_uow.py ================================================ from threading import Thread from time import sleep from typing import List import pytest from sqlalchemy.orm import selectinload from sqlmodel import ( Session, select, ) from src.domain.model import ( Batch, OrderLine, Product, ) from src.service_layer.unit_of_work import UnitOfWork from tests.e2e.test_app import random_batch_ref sku = "GENERIC-SOFA" def insert_batch(session, batch_id): session.add(Product(sku=sku, batches=[Batch(reference=batch_id, sku=sku, purchased_quantity=100, eta=None)])) def get_allocated_batch_ref(session, order_id, sku): batches = session.exec(select(Batch).where(Batch.sku == sku).options(selectinload(Batch.allocations))).all() batch = next(batch for batch in batches for allocation in batch.allocations if allocation.order_id == order_id) return batch.reference def test_uow_retrieve_batch_and_allocate_to_it(session): insert_batch(session, "batch1") session.commit() with UnitOfWork(session) as uow: product = uow.products.get(sku=sku) line = OrderLine(order_id="o1", sku=sku, qty=10) product.allocate(order_line=line) uow.commit() assert get_allocated_batch_ref(session, "o1", "GENERIC-SOFA") == "batch1" def test_rolls_back_uncommitted_work_by_default(in_memory_db): old_session, new_session = Session(in_memory_db), Session(in_memory_db) with UnitOfWork(): insert_batch(old_session, "batch1") assert list(new_session.exec(select(Batch)).all()) == [] def test_rolls_back_on_error(in_memory_db): old_session, new_session = Session(in_memory_db), Session(in_memory_db) class MyException(Exception): pass with pytest.raises(MyException): with UnitOfWork(old_session): insert_batch(old_session, "batch1") raise MyException() assert list(new_session.exec(select(Batch)).all()) == [] def try_to_allocate(order_id: str, exceptions: List[Exception]): line = OrderLine(order_id=order_id, sku=sku, qty=10) try: with UnitOfWork() as uow: product = uow.products.get(sku) product.allocate(line) sleep(0.2) uow.commit() except Exception as e: exceptions.append(e) def test_concurrent_updates_to_version_number_are_not_allowed(postgres_db): session = Session(postgres_db) insert_batch(session, random_batch_ref()) session.commit() exceptions = [] t1, t2 = Thread(target=try_to_allocate, args=("order_id_1", exceptions)), Thread(target=try_to_allocate, args=("order_id_2", exceptions)) t1.start(), t2.start(), t1.join(), t2.join() product = session.exec(select(Product).where(Product.sku == sku)).one() assert product.version_number == 1 assert "could not serialize access due to concurrent update" in str(exceptions[0]) ================================================ FILE: books/python-architecture-patterns/tests/integration/test_views.py ================================================ from datetime import date from unittest.mock import Mock import pytest from sqlmodel import Session from src import views from src.adapters.orm import clean_db_and_tables from src.bootstrap import bootstrap from src.domain import commands from src.service_layer.unit_of_work import UnitOfWork today = date.today() @pytest.fixture def sqlite_bus(in_memory_db): bus = bootstrap( start_orm=True, uow=UnitOfWork(Session(in_memory_db)), notifications=Mock(), publish=lambda *args: None, ) yield bus clean_db_and_tables(in_memory_db) def test_allocations_view(sqlite_bus): sqlite_bus.handle(commands.CreateBatch("sku1batch", "sku1", 50, None)) sqlite_bus.handle(commands.CreateBatch("sku2batch", "sku2", 50, today)) sqlite_bus.handle(commands.Allocate("order1", "sku1", 20)) sqlite_bus.handle(commands.Allocate("order1", "sku2", 20)) sqlite_bus.handle(commands.CreateBatch("sku1batch-later", "sku1", 50, today)) sqlite_bus.handle(commands.Allocate("other_order", "sku1", 30)) sqlite_bus.handle(commands.Allocate("other_order", "sku2", 10)) assert views.allocations("order1", sqlite_bus.uow) == [ {"sku": "sku1", "batch_ref": "sku1batch"}, {"sku": "sku2", "batch_ref": "sku2batch"}, ] def test_deallocation(sqlite_bus): sqlite_bus.handle(commands.CreateBatch("b1", "sku1", 50, None)) sqlite_bus.handle(commands.CreateBatch("b2", "sku1", 50, today)) sqlite_bus.handle(commands.Allocate("o1", "sku1", 40)) sqlite_bus.handle(commands.ChangeBatchQuantity("b1", 10)) assert views.allocations("o1", sqlite_bus.uow) == [ {"batch_ref": "b1", "sku": "sku1"}, {"batch_ref": "b2", "sku": "sku1"} ] ================================================ FILE: books/python-architecture-patterns/tests/unit/__init__.py ================================================ ================================================ FILE: books/python-architecture-patterns/tests/unit/test_batches.py ================================================ from datetime import date from src.domain.model import ( Batch, OrderLine, ) def batch_and_line(sku, batch_quantity, line_quantity): return Batch(reference="batch-001", sku=sku, purchased_quantity=batch_quantity, eta=date.today()), OrderLine(order_id="order-123", sku=sku, qty=line_quantity) def test_allocating_to_batch_reduces_available_quantity(): batch, line = batch_and_line("SMALL-TABLE", 20, 2) batch.allocate(line) assert batch.available_quantity == 18 def test_can_allocate_if_available_greater_than_required(): large_batch, small_line = batch_and_line("ELEGANT-LAMP", 20, 2) assert large_batch.can_allocate(small_line) def test_cannot_allocate_if_available_smaller_than_required(): small_batch, large_line = batch_and_line("ELEGANT-LAMP", 2, 20) assert not small_batch.can_allocate(large_line) def test_not_allocate_if_available_equal_to_required(): small_batch, large_line = batch_and_line("ELEGANT-LAMP", 2, 2) assert small_batch.can_allocate(large_line) def test_cannot_allocate_if_skus_dont_match(): batch = Batch(reference="batch-001", sku="UNCOMFORTABLE-CHAIN", purchased_quantity=100, eta=None) different_sku_line = OrderLine(order_id="order-123", sku="EXPENSIVE-TOASTER", qty=10) assert not batch.can_allocate(different_sku_line) def test_can_only_deallocate_allocated_lines(): batch, unallocated_line = batch_and_line("DECORATIVE-TRINKET", 20, 2) batch.deallocate(unallocated_line) assert batch.available_quantity == 20 def test_allocation_is_idempotent(): batch, line = batch_and_line("ANGULAR-DESK", 20, 2) batch.allocate(line) batch.allocate(line) assert batch.available_quantity == 18 ================================================ FILE: books/python-architecture-patterns/tests/unit/test_handlers.py ================================================ from __future__ import annotations from collections import defaultdict from datetime import date from typing import ( Dict, List, Optional, ) import pytest from src.adapters.notifications import AbstractNotifications from src.adapters.repository import ( AbstractRepository, TrackingRepository, ) from src.bootstrap import bootstrap from src.domain import commands from src.domain.model import Product from src.service_layer.handlers import InvalidSku from src.service_layer.unit_of_work import AbstractUnitOfWork class FakeRepository(AbstractRepository): def __init__(self, products): super().__init__() self._products = set(products) def add(self, product: Product): self._products.add(product) def get(self, sku: str) -> Optional[Product]: return next((product for product in self._products if product.sku == sku), None) def get_by_batch_ref(self, ref: str) -> Optional[Product]: return next((product for product in self._products for batch in product.batches if batch.reference == ref), None) class FakeUnitOfWork(AbstractUnitOfWork): def __init__(self): self.products = TrackingRepository(repo=FakeRepository([])) self.committed = False def rollback(self): pass def _commit(self): self.committed = True class FakeNotifications(AbstractNotifications): def __init__(self): self.sent: Dict[str, List[str]] = defaultdict(list) def send(self, destination, message): self.sent[destination].append(message) def bootstrap_test_app(): return bootstrap( start_orm=False, uow=FakeUnitOfWork(), notifications=FakeNotifications(), publish=lambda *args: None, ) class TestAddBatch: def test_for_new_product(self): bus = bootstrap_test_app() bus.handle(commands.CreateBatch(ref="b1", sku="CRUNCHY-ARMCHAIN", qty=100)) assert bus.uow.products.get("CRUNCHY-ARMCHAIN") is not None assert bus.uow.committed def test_for_existing_product(self): bus = bootstrap_test_app() bus.handle(commands.CreateBatch(ref="b1", sku="GARISH-RUG", qty=100)) bus.handle(commands.CreateBatch(ref="b2", sku="GARISH-RUG", qty=99)) assert "b2" in [b.reference for b in bus.uow.products.get("GARISH-RUG").batches] class TestAllocate: def test_errors_for_invalid_sku(self): bus = bootstrap_test_app() bus.handle(commands.CreateBatch(ref="b1", sku="AREALSKU", qty=100)) with pytest.raises(InvalidSku, match="Invalid SKU: NONEXISTENTSKU"): bus.handle(commands.Allocate(order_id="o1", sku="NONEXISTENTSKU", qty=10)) def test_commits(self): bus = bootstrap_test_app() bus.handle(commands.CreateBatch(ref="b1", sku="OMINOUS-MIRROR", qty=100)) bus.handle(commands.Allocate(order_id="o1", sku="OMINOUS-MIRROR", qty=10)) assert bus.uow.committed def test_sends_email_on_out_of_stock_error(self): fake_notifications = FakeNotifications() bus = bootstrap( start_orm=False, uow=FakeUnitOfWork(), notifications=fake_notifications, publish=lambda *args: None, ) bus.handle(commands.CreateBatch(ref="b1", sku="POPULAR-CURTAINS", qty=9)) bus.handle(commands.Allocate(order_id="o1", sku="POPULAR-CURTAINS", qty=10)) assert fake_notifications.sent["stock@made.com"] == ["Out of stock for POPULAR-CURTAINS"] class TestChangeBatchQuantity: def test_changes_available_quantity(self): bus = bootstrap_test_app() bus.handle(commands.CreateBatch(ref="batch1", sku="ADORABLE-SETTEE", qty=100)) [batch] = bus.uow.products.get("ADORABLE-SETTEE").batches assert batch.available_quantity == 100 bus.handle(commands.ChangeBatchQuantity(ref="batch1", qty=50)) assert batch.available_quantity == 50 def test_reallocates_if_necessary(self): bus = bootstrap_test_app() event_history = [ commands.CreateBatch(ref="batch1", sku="INDIFFERENT-TABLE", qty=50), commands.CreateBatch(ref="batch2", sku="INDIFFERENT-TABLE", qty=50, eta=date.today()), commands.Allocate(order_id="order1", sku="INDIFFERENT-TABLE", qty=20), commands.Allocate(order_id="order2", sku="INDIFFERENT-TABLE", qty=20), ] for e in event_history: bus.handle(e) [batch_1, batch_2] = bus.uow.products.get("INDIFFERENT-TABLE").batches assert batch_1.available_quantity == 10 assert batch_2.available_quantity == 50 bus.handle(commands.ChangeBatchQuantity(ref="batch1", qty=25)) assert batch_1.available_quantity == 5 assert batch_2.available_quantity == 30 ================================================ FILE: books/python-architecture-patterns/tests/unit/test_product.py ================================================ from datetime import date from src.domain import events from src.domain.model import ( Batch, OrderLine, Product, ) def test_prefers_current_stock_batches_to_shipments(): in_stock_batch = Batch(reference="in-stock-batch", sku="RETRO-CLOCK", purchased_quantity=100, eta=None) shipment_batch = Batch(reference="shipment-batch", sku="RETRO-CLOCK", purchased_quantity=100, eta=None) line = OrderLine(order_id="oref", sku="RETRO-CLOCK", qty=10) product = Product(sku="RETRO-CLOCK", batches=[in_stock_batch, shipment_batch]) product.allocate(line) assert in_stock_batch.available_quantity == 90 assert shipment_batch.available_quantity == 100 def test_prefers_earlier_batches(): earliest = Batch(reference="speedy-batch", sku="MINIMALIST-SPOON", purchased_quantity=100, eta=date(2022, 1, 7)) medium = Batch(reference="normal-batch", sku="MINIMALIST-SPOON", purchased_quantity=100, eta=date(2022, 1, 8)) latest = Batch(reference="slow-batch", sku="MINIMALIST-SPOON", purchased_quantity=100, eta=date(2022, 1, 9)) line = OrderLine(order_id="oref", sku="MINIMALIST-SPOON", qty=10) product = Product(sku="MINIMALIST-SPOON", batches=[medium, earliest, latest]) product.allocate(line) assert earliest.available_quantity == 90 assert medium.available_quantity == 100 assert latest.available_quantity == 100 def test_returns_allocated_batch_ref(): in_stock_batch = Batch(reference="in-stock-batch-ref", sku="HIGHBROW-POSTER", purchased_quantity=100, eta=None) shipment_batch = Batch(reference="shipment-batch-ref", sku="HIGHBROW-POSTER", purchased_quantity=100, eta=date(2022, 1, 7)) line = OrderLine(order_id="oref", sku="HIGHBROW-POSTER", qty=10) product = Product(sku="HIGHBROW-POSTER", batches=[in_stock_batch, shipment_batch]) allocation = product.allocate(line) assert allocation == in_stock_batch.reference def test_records_out_of_stock_event_if_cannot_allocate(): batch = Batch(reference="batch", sku="SMALL-FORM", purchased_quantity=10, eta=date(2022, 1, 7)) product = Product(sku="SMALL-FORK", batches=[batch]) product.allocate(OrderLine(order_id="oref", sku="SMALL-FORM", qty=10)) allocation = product.allocate(OrderLine(order_id="oref", sku="SMALL-FORM", qty=1)) assert product.messages[-1] == events.OutOfStock(sku="SMALL-FORM") assert allocation is None ================================================ FILE: books/refactoring.md ================================================ [go back](https://github.com/pkardas/learning) # Refactoring: Improving the Design of Existing Code Book by Martin Fowler (Second Edition) - [Chapter 1: Refactoring: A First Example](#chapter-1-refactoring-a-first-example) - [Chapter 2: Principles in Refactoring](#chapter-2-principles-in-refactoring) - [Chapter 3: Bad Smells in Code](#chapter-3-bad-smells-in-code) - [Chapter 4: Building Tests](#chapter-4-building-tests) - [Chapter 5: Introducing the Catalog](#chapter-5-introducing-the-catalog) - [Chapter 6: A First Set of Refactorings](#chapter-6-a-first-set-of-refactorings) - [Chapter 7: Encapsulation](#chapter-7-encapsulation) - [Chapter 8: Moving Features](#chapter-8-moving-features) - [Chapter 9: Organising Data](#chapter-9-organising-data) - [Chapter 10: Simplifying Conditional Logic](#chapter-10-simplifying-conditional-logic) - [Chapter 11: Refactoring APIs](#chapter-11-refactoring-apis) - [Chapter 12: Dealing with Inheritance](#chapter-12-dealing-with-inheritance) ## Chapter 1: Refactoring: A First Example A poorly designed system is hard to change - because it is hard to figure out what to change and hoe these changes will interact with existing code. > When you have to add a feature to a program but the code is not structured in a convenient way, first refactor the > program to make it easy to add the feature, then add the feature. Before making any changes, start with self-checking tests (assertions checked by testing framework). Tests can be considered as bug detectors, they should catch any change that introduces bugs. Refactoring changes the programs in small steps, so if you make a mistake, it is easy to find where the bug is. Author suggests committing after each successful refactoring, so it is easier get back to a working state, then he squashes changes into more significant commits before pushing changes to the remote repository. When refactoring a long functions, mentally try to identify points that separate different parts of the overall behaviour (decomposition). Extracting a function is a common refactoring technique. > Any fool can write code that a computer can understand. Good programmers write code that humans can understand. Other techniques discussed also later: Replace Temp with Query, Inline Variable, Change Function Declaration, Split Loop, Slide Statements. Think of the best name at the moment and rename it later. Breaking large functions into smaller, only adds value if the names are good. > Programmers are poor judges of how code actually performs. Many of our intuitions are broken by clever compilers, > modern caching techniques, .... The performance of software usually depends on just a few parts of the code, and > changes anywhere else don't make an appreciable difference. ANYHOW, if refactoring introduces performance slow-downs, finish refactoring first and then do performance tuning. Mutable data quickly becomes something rotten. > Always leave the code base healthier than when you found it. It will never be perfect, but it should be better. > A true test of good code is how easy it is to change it. Code should be obvious. When doing refactoring, take small steps, each step should leave code in a working state that compiles and passes its tests. ## Chapter 2: Principles in Refactoring Refactoring (noun) - a change made to the internal structure of software to make it easier to understand and cheaper to modify without changing its observable behaviour. Refactoring (verb) - to restructure software by applying a series of refactorings without changing its observable behaviour. When doing refactoring, code should not spend much time in a broken state, meaning it allows stopping at any moment even if you haven't finished. If someone says their code was broken for a couple of days while they are refactoring, you can be pretty sure they were not refactoring. Two Hats - when developing new functionalities - do not change existing code, when refactoring - do not add new functionalities. Swap hats: refactor, add functionality, refactor, ... Why should we refactor? - software design improvement - changes are made to achieve short-term goals, because of that, code looses its structure, regular refactoring help keep the code in shape. Important aspect of refactoring is eliminating duplicated code. - makes software easier to understand - think about future developers, decrease time needed to make a change. You don't have to remember every aspect of code, make it easy to understand and decrease load on your brain. - helps in finding bugs - clarify the structure, certain assumptions. - helps programming faster - adding new features might be difficult in a system full of patches and patches for patches, clear structure allows adding new capabilities faster. Good design allows to quickly find place where a change needs to be made. Also, if code is clear, it is less likely to introduce a bug. Code base should be a platform for building new features for its domain. > The Rule of Three - The first time you do something, you just do it. The second time you do something similar, you > wince at the duplication, but you do the duplicate anyway. The third time you do something similar, you refactor. When should we refactor? - preparatory refactoring - building a foundation for a new feature. - > It is like you want to go 100 km east but instead of traipsing through the woods, you drive 20 kms north to the > highway, and then you are going 3x the speed you could have if you just went straight there. - comprehension refactoring - making code easier to understand. Move understanding of a subject from head to code itself. - litter-pickup refactoring - make small changes around place you are currently viewing - Boy Scout Rule. - planned and opportunistic refactoring - refactoring should happen when doing other things, planned refactorings are usually required in teams that neglected refactoring. - long-term refactoring - refactoring may take weeks because of new library or pull some section of code out into a component that can be shared between teams - even in such cases refactoring should be performed in small steps. - refactoring in a code review - code reviews help spread knowledge, through a development team. Code may look clear to me but not for my team. Code reviews give the opportunity for more people to suggest useful ideas. Sometimes it is easier to rewrite than refactor. The decision to refactor or rewrite requires good judgement and experience. However, there are a couple of problems associated to refactoring: - some people see refactoring as something that is slowing down development (which is not really true), this should be explained - the economic benefits of refactoring should always be the driving factor, we refactor because it makes us faster to add features and fix bugs. - merge conflicts may be painful, especially in a team of multiple full-time developers, suggested approach is to use CI - Continuous Integration - each team member integrates with mainline at least once per day. - to perform refactoring correctly you need to have good tests, code needs to be self-testing, without self-testing code refactoring carries high risk of introducing bugs - refactoring legacy code is hard, but is a fantastic tool to help understand a legacy system. Legacy code is often missing tests, adding tests for legacy code is difficult because it wasn't designed with testing in mind. - some time ago database refactoring was considered a problem era, currently we have migrations which are making database refactoring possible Refactoring changed how people think about architecture (previously: completed before any development, now: changed iteratively). YAGNI does not mean you need to neglect all architectural thinking. In order to be fully agile, team has to be capable and enthusiastic refactorers. The first foundation for refactoring is self-testing code, the second is CI. Good programmers know that they rarely write clean code the first time around. IDEs use the syntax tree to analyse and refactor code (e.g. changing variable name is on syntax tree level, not on text level), this makes IDEs more powerful than text editors. ## Chapter 3: Bad Smells in Code When you should start refactoring? It is a matter of intuition. However, there are some indicators. MYSTERIOUS NAME - code needs to be mundane and clear, good name can save hours of puzzled incomprehension in the future. DUPLICATED CODE - if you see the same code structure in more than one place, your program will be better if you find a way to unify them., duplication means every time you read these copies you need to read them carefully and look for differences. LONG FUNCTION - the programs that live best and longest are those with short functions. Whenever you feel you need to comment something - decompose. Even a single line is worth extracting if it needs an explanation. Conditionals and loops are also signs for extractions. LONG PARAMETER LIST - long lists of parameters are confusing - pass an object, use query on existing object or combine function on object. GLOBAL DATA - problem with global data is that it can be modified from any place in the code base, this leads to bugs. Global data: global variables, class variables, singletons. Global data is especially nasty when it is mutable. MUTABLE DATA - (from functional programming) data should never change, updating data structure should return a new copy of the structure, leaving the old data pristine. DIVERGENT CHANGE - making changes should be easy, if you need to, for example, edit 4 functions every time you add a new financial instrument, something is off. SHOTGUN SURGERY - every time you make a change, you have to make a lot of little edits to a lot of different classes, when changes are all over the place, they are hard to find, and it is easy to miss an important change. In such case all fields should be put in a single module. FEATURE ENVY - for example: a function in one module spends more time communicating with functions or data inside another module than it does within its own module - the function clearly wants to be with the data, so move function to get it there. Put things together that change together. DATA CLUMPS - some items enjoy hanging around together, same three or four data items appear together in lots of places - you can group them together. PRIMITIVE OBSESSION - many programmers are reluctant to create their own fundamental types which are useful for their domain. REPEATED SWITCHES - basically the same problem as in DUPLICATED CODE. LOOPS - loops are less relevant in programming today because of presence of map and filter mechanisms. LAZY ELEMENT - sometimes you may want to replace function with inline code or collapse objects hierarchy. SPECULATIVE GENERALITY - all the special cases to handle situations that are not going to happen soon (YAGNI). TEMPORARY FIELD - a class with a field which is set only in certain circumstances - difficult to understand. MESSAGE CHAINS - client asks object for another object, which the client asks for yer another object - this might cause a train wreck, navigating such code is difficult. MIDDLE MAN - internal details of the object should be hidden from the rest of the world INSIDER TRADING - modules should be separated to keep them whispering, if 2 modules have common interests, create a third module for this communication. LARGE CLASS - when class has too many fields it is a sign that it is doing too much, this means duplicated code, chaos and death. ALTERNATIVE CLASSES WITH DIFFERENT INTERFACES - if you are allowing substitution, classes have to have the same interface. DATA CLASS - classes with fields, setters and getters - nothing else. Such classes are often being manipulated in far too much detail by other classes. You can try to move that behaviour into the data class. REFUSED BEQUEST - wrong hierarchy, subclasses don't want or need what they are given. COMMENTS - when you feel the need to write a comment, first try to refactor the code so that any comment becomes superfluous. ## Chapter 4: Building Tests Proper refactoring can not be done without proper tests. A suite of tests is a powerful bug detector that decapitates the time it takes to find bugs. TDD allows concentrating on the interface rather than the implementation, which is a good thing. Always make sure a test will fail when it should (try to break your code, to see if test fails as well). Testing should be risk-driven, you don't need to test every getter. When you get a bug report, start by writing a unit test that exposes the bug. The best measure for a good enough test suite is subjective: How confident are you that is someone introduces a defect into your code, some test will fail? ## Chapter 5: Introducing the Catalog The rest of the book is a catalog of refactorings. Each *Refactoring* has: name, sketch, motivation, mechanics and examples. ## Chapter 6: A First Set of Refactorings EXTRACT FUNCTION - write small functions. INLINE FUNCTIONS - inverse of *extract function*, sometimes function body is as clear as the name. Helpful when you need to group functions - first you join them and then extract functions. EXTRACT VARIABLE - inverse of *inline variable*, expressions can become very complex and hard to read in such situations, local variables may help break the expression down into something more manageable. INLINE VARIABLE - inverse of *extract variable*, sometimes name doesn't communicate more than the expression itself. CHANGE FUNCTION DECLARATION - if you see a function with the wrong name, change it as soon you understand what a better name would be, so next time you are looking at the code you don't have to figure out what is going on. Often a good way of improving name is to write a comment to describe the function's purpose - then turn that comment into a name (applies to names as well). Adding / removing parameters can be done through introducing intermediate wrapping function. ENCAPSULATE VARIABLE - encapsulate access to the variable using functions, instead of accessing data directly, do this through single access point - function. Keeping data encapsulated is less important for immutable data. RENAME VARIABLE - variables can do a lot to explain what programmer is up to (if he names it well). INTRODUCE PARAMETER OBJECT - often a group of data items travel together, appear in function after function. Such group is a data clump - this can be easily replaced with data structure. Example: ``` def amountInvoiced(start: date, end: date) def amountInvoiced(date_range: Range) ``` Grouping data into a structure is valuable because it makes explicit the relationship between the data items and reduces the size of parameter lists. Grouping helps to identify new structures. COMBINE FUNCTIONS INTO CLASS - when group of functions operate closely together on a common body of data, there is an opportunity to form a class. > Uniform access principle - All services offered by a module should be available through a uniform notation, which does > not betray whether they are implemented through storage or through computation. With this, the client of the class > can't tell whether the *value* is a field or derived value. COMBINE FUNCTIONS INTO TRANSFORM - instead of aggregating function into classes you can build functions that are enriching existing objects. Transformation is about producing essentially the same thing with some additional information. SPLIT PHASE - whenever you encounter code that does two things, look for a way to split it into separate modules. If some processing has 2 stages, make the difference explicit by turning them into 2 separate modules. ## Chapter 7: Encapsulation ENCAPSULATE RECORD - instead of using plain dictionaries, encapsulate them into object. With object, you can hide what is stored and provide methods for all the values. The user does not have to care which value is calculated and which is stored. **Dictionaries are useful** in many programming situations **but they are not explicit about their fields**. Refactor implicit structures into explicit ones. ENCAPSULATE COLLECTION - good idea is to ensure that the getter for the collection can not accidentally change it. One way to prevent modification of a collection is to use some form of read-only proxy to the collection. Such proxy can allow all reads but block any write to the collection. The most popular approach is to provide a getting method for the collection, but make it return a copy of underlying collection. Replacing `customer.orders.size` with `customer.num_of_orders` is not recommended, because adds a lot of extra code and cripples the easy composability of collection operations. If the team has the habit of not modifying collections outside the original module, it might be enough. It is worse to be moderately paranoid about collections, rather copy them unnecessarily than debug errors due to unexpected modifications. For example instead of sorting in place return a new copy. REPLACE PRIMITIVE WITH OBJECT - simple facts can be represented by simple data items such as numbers or strings, as development proceeds, those simple items aren't so simple anymore. This is one of the most important refactorings. Starting with simple wrapping value with the object, you can extend the class with additional behaviours. REPLACE TEMP WITH QUERY - using temporary variables allows referring to the value while explaining its meaning and avoiding repeating the code that calculates it. But while using a variable is handy, it can often be worthwhile to go a step further and use a function instead, mostly when the variable needs to be calculated multiple times across the class. EXTRACT CLASS - split classes containing too much logic into separate classes. Good signs for doing so: - subset of the data and a subset of methods seem to go together - data that usually change together or are particularly dependent on each other Useful test: Ask question: what would happen if you remove a piece of data or a method, what other fields and methods would become nonsense? INLINE CLASS - inverse of *Extract Class*. Generally useful as intermediate step when performing refactoring, e.g. you put all attributes in one class, just to split them later. HIDE DELEGATE - Example: `person.department.manager` should be replaced with `person.manager` (additional getter hiding delegate). Why? If delegate changes its interface, change has to propagated across all parts of the system. REMOVE MIDDLE MAN - inverse of *Hide Delegate*. Sometimes forwarding introduced by Hide Delegate, becomes irritating. Sometimes it is easier to call the delegate directly (violation of Law of Demeter, but author suggests better name: Occasionally Useful Suggestion of Demeter). SUBSTITUTE ALGORITHM - There are usually several ways to do the same thing, same is with algorithms. When you learn more about the problem, you can realise there is an easier way to do it. ## Chapter 8: Moving Features Another important part of refactoring is moving elements between contexts. MOVE FUNCTION - one of the most straightforward reasons to move a function is when it references elements in other contexts more than the one it currently resides in. Deciding to move a function rarely an easy decision. Examine the current and candidate contexts for that function. MOVE FIELD - programming involves writing a lot of code that implements behaviour - but the strength of a program is really founded on its data structures. If I have a good set of data structures that match the problem, then my behaviour code is simple and straightforward. Moving fields usually happen in the context of a broader set of changes. MOVE STATEMENTS INTO FUNCTION - removing duplication is one of the best rules of thumb of healthy code. Look to combine repeating code into the function. That way any future modifications to the repeating code can be done in one place and used by all the callers. MOVE STATEMENTS TO CALLERS - this is inverse of *Move Statements into Function*. Motivation for this refactoring is that we rarely get the boundaries right. Sometimes common behaviour used in several places needs to vary in some of its calls, that is why you can move the varying behaviour function to its callers. REPLACE INLINE CODE WITH FUNCTION CALL - functions allow packaging bits of behaviour, this is useful for understanding - a named function can explain the purpose of the code rather than its mechanics. Also, useful for deduplication. SLIDE STATEMENTS - code is easier to understand when things that are related to each other appear together. If several lines of code accesses the same data structure, it is best for them to be together rather than intermingled with code accessing other data structures. You can also declare the variable just before you first use it. SPLIT LOOP - you have often seen loops that are doing two different things at once just because they can do that with one pass through a loop. But if you are doing two different things in the same loop, then whenever you need to modify the loop you have to understand both things. By splitting loop, you ensure you only need to understand the behaviour you need to modify. Many programmers are uncomfortable with this refactoring as it forces you to execute the loop twice. REMINDER: Once you have your code clear, you can optimise it, and if the loop traversal is a bottleneck, it is easy to slam the loops back together. But the actual iteration through even a large list I rarely a bottleneck, and splitting the loops often enables other, more powerful optimisations. REPLACE LOOP WITH PIPELINE - language environments provide better constructs than loops - the collection pipeline (`input.filter(...).map(...)`). Logic much easier to follow if it is expressed as a pipeline. It can be read from top to bottom to see how objects flow through the pipeline. REMOVE DEAD CODE - decent compilers will remove unused code. But unused code is still a significant burden when trying to understand how the software works. Once code is not used it should be deleted. If you need it sometime in future - you have a version control system, so you can always dig it out again. Commenting out dead code was once a bad habit, it was useful before version control systems were widely used or when they were inconvenient. ## Chapter 9: Organising Data Data structures play an important role in our programs, so no surprise there are a clutch of refactorings that focus on them. SPLIT VARIABLE - Using a variable for two different things is very confusing for the reader. Any variable with more than one responsibility should be replaced with multiple variables, one for each responsibility. Exception: Collecting variables (e.g. `i = i + 1`) - often used for calculating sums, string concatenation, writing to stream or adding to a collection - don't split it. RENAME FIELD - Data structures are the key to understand what is going in inside the system. It is essential to keep them clear. Rename fields in classes / records, so they are easy to understand. REPLACE DERIVED VARIABLE WITH QUERY - One of the biggest sources of problems in software is mutable data. Data changes can often couple together parts of code in awkward ways, with changes in one part leading to knock-on effects that are hard to spot. Remove variables that can be easily calculated. A calculation often makes it clearer what the meaning of the data is, and it is being protected by from being corrupted when you fail to update the variable as the source data changes. CHANGE REFERENCE TO VALUE - Instead of updating values of the nested objects, create new object with updated params. Value objects are generally easier to reason about, particularly because they are immutable. Immutable data structures are easier to work with. CHANGE VALUE TO REFERENCE - (inverse of *Change Reference to Value*). A data structure may have several records linked to the same logical data structure. The biggest difficulty in having physical copies of the same logical data occurs when you need to update the shared data. Then you have to find all the copies and update them all. If you miss one, you will get a troubling inconsistency in the data. In this case, it is often worthwhile to change the copied data into a single reference. ## Chapter 10: Simplifying Conditional Logic Much of the power of programs comes from their ability to implement conditional logic - but, sadly, much of the complexity of programs lies in these conditionals. DECOMPOSE CONDITIONAL - Length of a function is in itself a factor that makes it hared to read, but conditions increase the difficulty. As with any large block of code, you can make your intention clearer by decomposing it and replacing each chunk of code with a function call named after the intention of that chunk. CONSOLIDATE CONDITIONAL EXPRESSION - Sometimes you can run into a series of conditional checks where each check is different yet the resulting action is the same. When you see this, you can use `and` and `or` operators to consolidate them into a single conditional check with a single result. Consolidating is important because it makes it clearer by showing that you are making a single check that combines other checks, and because it often sets you up for *Extract Function*. Extracting a condition is one of the most useful things you can do to clarify code. REPLACE NESTED CONDITIONAL WITH GUARD CLAUSES - Guard Clause says: "This isn't the core to this function, and if it happens, do something and get out". In other words, if you know the result, return it immediately instead of assigning to `result` variable, just to have one single return statement at the end of the function. *// A guard clause is simply a check that immediately exits the function, either with a return statement or an exception.* REPLACE CONDITIONAL WITH POLYMORPHISM - It is possible to put logic in superclasses which allows reasoning about it without having to worry about the variants. Each variant case can be put in a subclass. Complex conditional logic can be improved using polymorphism. This feature can be overused, basic conditional logic should use basic conditional statements. INTRODUCE SPECIAL CASE - also known as: *Introduce Null Object*. Many parts of the system have the same reaction to a particular value, you may want to bring that reaction into a single place. Special Case pattern is a mechanism that captures all the common behaviour, this allows to replace most of special-case checks with simple calls. A common value that needs special-case processing is null, which is why this pattern is often called the Null Object pattern. INTRODUCE ASSERTION - Often, sections of code work only if certain conditions are true. Such assumptions are not often stated explicitly, but can only be deducted by looking through an algorithm. Sometimes, these assumptions are stated with a comment. A better technique is to make the assumption explicit by writing assertion. Failure of an assertion indicates a programmer error. Assertions should never be checked by other parts of the system. Assertions should be written that the program functions equally correctly if they all removed. Use assertions to check things that need to be true, use them when you think they should never fail. ## Chapter 11: Refactoring APIs Modules and functions are building the blocks of our software. APIs are the joints that we use to plug them together. Making APIs easy to understand and use is difficult. SEPARATING QUERY FROM MODIFIER - It is a good idea to clearly signal the difference between functions with side effects and those without. A good rule to follow is that any function that returns value should not have *observable* ( e.g. cache does not count) side effects (command-query separation). Having a function that gives value without observable side effects is very valuable because you can call this function as often as you like. PARAMETRISE FUNCTION - If you see two functions that carry out very similar logic with different literal values, you can remove duplication by using a single function with parameters for the different values. REMOVE FLAG ARGUMENT - A flag argument is a function argument that the caller uses to indicate which logic the called function should execute (via boolean value, enum or strings). Flags complicate the process of understanding what function calls are available and how to call them. Boolean values are the worst since they don't convey their meaning to the reader - what `true` means? Remove flag arguments. There is only one case for flag arguments - when there are more than one flag arguments - making specialised function for every combination of values would greatly increase the complexity. But on the other hand this is a signal of function doing too much. PRESERVE WHOLE OBJECT - If you see code that derives couple of values from a record and then passes these values into a function, replace those values with the whole record itself, letting the function body derive the value it needs. This change reduces number of parameters and handles better future changes. Pulling several values from an object to do some logic on them alone is a smell - *Feature Envy* - and usually a signal that this logic should be moved into the object itself. If several bits of code only use the same subset of an object's features, then that may indicate a good opportunity for *Extract Class*. REPLACE PARAMETER WITH QUERY - (inverse of *Replace Query with Parameter*). The parameter list to a function should summarise the points of variability of that function, indicating the primary ways in which that function may behave differently. If a call passes in value that the function can easily determine for itself, that is a form of duplication. When the parameter is present, determining its value is the caller's responsibility - otherwise, that responsibility shifts to the function body. Usually habit should be to simplify life for callers, which implies moving responsibility to the function body. REPLACE QUERY WITH PARAMETER - (inverse of *Replace Parameter with Query*). You can move query to the parameter, you force caller to figure out how to provide this value. This complicates life for callers of the functions (preferably make life easier for customers). REMOVE SETTING METHOD - Providing a setting method indicates that a field may be changed. If you don't want that field to change once the object is created, do not provide a setting method (and make field immutable). Remove setter to make it clear that updates make no sense after construction. REPLACE CONSTRUCTOR WITH FACTORY FUNCTION - Constructors often come with awkward limitations that aren't there for regular functions. Constructor name is fixed, often require special operator (`new`). A factory function from no such limitations. REPLACE FUNCTION WITH COMMAND - There are times when it is useful to encapsulate a function into its now object (command object / command). Such an object is mostly built around a single method, whose request and execution is the purpose of the object. A command offers a greater flexibility for the control and expression of a function than the plain function mechanism. Commands can have operations such as `undo`. There are good reasons to use commands, but do not forget that this flexibility comes at a price paid in complexity. REPLACE COMMAND WITH FUNCTION - (inverse of *Replace Function with Command*) - Command object provide a powerful mechanism for handling complex computations. Most of the time, you just want to invoke a function and have it to do its thing. If the function isn't too complex, then a command object is more trouble than its worth and should be turned into a regular function. ## Chapter 12: Dealing with Inheritance Inheritance is a very useful and easy to misuse mechanism. PULL UP METHOD - form of removing duplication (duplication is bad because there is risk that an alteration to one copy will not be made to the other). Pulling method up means putting method in a parent class. PULL UP CONSTRUCTOR BODY - Common constructor behaviour should reside in the superclass. PUSH DOWN METHOD - (inverse of *Pull Up Method*). If a method is only relevant to someone subclass (or a small proportion of subclasses), removing it from the superclass and putting it only on the subclass makes that clearer. You can only do this refactoring if the caller knows it is working with a particular subclass - otherwise, use *Replace Conditional with Polymorphism* with some placebo behaviour on the superclass. PUSH DOWN FIELD - If a field is only used by one subclass (or a small proportion of subclasses), move it to those subclasses. REPLACE TYPE CODE WITH SUBCLASS - Instead of using *flag* in the object indicating type of the class ( e.g. `Employe(engineer)`) create specialised superclass. REMOVE SUBCLASS - (inverse of *Replace Type Code with Subclasses*). Subclasses are useful, but as software system evolves, subclasses can lose their value. A subclass that does too little incur a cost in understanding that is no longer worthwhile. When that time, it is best to remove the subclass, replacing it with a field on its superclass. EXTRACT SUPERCLASS - If you see 2 classes doing similar things, you can take advantage of the basic mechanism of inheritance to pull their similarities together into a superclass. COLLAPSE HIERARCHY - When refactoring a class hierarchy, you can often pull and push features around. As the hierarchy evolves, you can find that a class and its parent are no longer different enough to be worth keeping separate. At this point you can merge them together. REPLACE SUBCLASS WITH DELEGATE - Instead of subclassing objects you can create separate, independent entity. There is a popular principle: "*Favour object composition over class inheritance*", however it doesn't mean "*inheritance is considered harmful*". Inheritance is a valuable mechanism that does the job most of the time without problems. So reach for inheritance first, and move for delegation when it starts to rub badly. REPLACE SUPERCLASS WITH DELEGATE - Subclassing can be done in a way that leads to confusion and complication. One of classing example is mis-inheritance from the early days of objects was making a stack be a subclass of a list. The idea was to reuse list's data storage and operations, however many additional, not applicable methods were available to the stack. A better approach is to make the list a field of the stack and delegate the necessary operations to it. ================================================ FILE: books/release-it.md ================================================ [go back](https://github.com/pkardas/learning) # Release It! Design and Deploy Production-Ready Software Book by Michael T. Nygard (Second Edition) - [Chapter 1: Living in Production](#chapter-1-living-in-production) - [Chapter 2: Case Study: The Exception That Grounded an Airline](#chapter-2-case-study-the-exception-that-grounded-an-airline) - [Chapter 3: Stabilise Your System](#chapter-3-stabilise-your-system) - [Chapter 4: Stability Anti-patterns](#chapter-4-stability-anti-patterns) - [Chapter 5: Stability Patterns](#chapter-5-stability-patterns) - [Chapter 6: Case Study: Phenomenal Cosmic Powers, Itty-Bitty Living Space](#chapter-6-case-study-phenomenal-cosmic-powers-itty-bitty-living-space) - [Chapter 7: Foundations](#chapter-7-foundations) - [Chapter 8: Processes on Machines](#chapter-8-processes-on-machines) - [Chapter 9: Interconnect](#chapter-9-interconnect) - [Chapter 10: Control Plane](#chapter-10-control-plane) - [Chapter 11: Security](#chapter-11-security) - [Chapter 12: Case Study: Waiting for Godot](#chapter-12-case-study-waiting-for-godot) - [Chapter 13: Design for Deployment](#chapter-13-design-for-deployment) - [Chapter 14: Handling Versions](#chapter-14-handling-versions) - [Chapter 15: Case Study: Trampled by Your Own Customers](#chapter-15-case-study-trampled-by-your-own-customers) - [Chapter 16: Adaptation](#chapter-16-adaptation) - [Chapter 17: Chaos Engineering](#chapter-17-chaos-engineering) ## Chapter 1: Living in Production "Feature complete" doesn't mean it is "production ready". A lot of bad things can happen on production (crazy users, viruses, high traffic, ...). Production is the only place to learn how the software will respond to real-world stimuli, hence software should be delivered to production quickly and gradually. Most software architecture and design happens in clean and distant from production environments. Design and architecture decisions are also financial decisions (downtime, resource usage, ...). It is important to consider availability, capacity and flexibility when designing software. Pragmatic architect should consider dynamic of change. ## Chapter 2: Case Study: The Exception That Grounded an Airline A tiny programming error starts the snowball rolling downhill. In any incident, author's priority is always to restore service. Restoring service takes precedence over investigation. If it is possible to gather some data for postmortem analysis, that's great - unless it makes the outage longer. The trick to restoring the service is figuring out what to target. You can always "reboot the world" by restarting every single server, layer by layer but that's not effective. Instead, be a doctor diagnosing a disease, look at the symptoms and figure what disease to treat. A postmortem is like a murder mystery, there are set of clues - some are reliable like logs, some are unreliable like comments from people, there is no corpse - the servers are up and running, the state that caused the error no longer exists. Log analysis helped to identify the root cause. Bugs are inevitable, how to prevent bugs in one system from affecting everything else? We are going to look at design patterns that can prevent this type of problem from spreading. ## Chapter 3: Stabilise Your System Enterprise software must be cynical - expects bad things to happen and is never surprised when they do. It doesn't even trust itself, it refuses to get too intimate with other systems, because it could get hurt. Poor stability means real costs - millions lost for example in lost transaction in trading system, reputation loss. On the other hand, good stability does not necessarily cost a lot. Highly stable design usually costs the same to implement as the unstable one. Transaction - abstract unit of work processed by the system. Impulse - rapid shock to the system. For example rumor about a new console, causes impulse on the manufacturer's website or celebrity tweet. Things that can fracture (break) the system in a blink of an eye. Stress - a force applied to the system over an extended period. The major dangers to system's longevity are memory leaks and data growth, difficult to catch during tests. Applications never run long enough in development environment to reveal longevity bugs. Failures will happen, you have ability to prepare system for specific failures (like car engineers areas designated to protect passengers by failing first). It is possible to create failure modes that protect the rest of the system. Less-coupled architectures act as shock absorbers, diminishing the effect of the error instead of amplifying them. Terminology: - Fault - a condition that creates an incorrect internal state in the software. - Error - visibly incorrect behaviour, e.g. trading system buying 10M Pokemon futures - Failure - an unresponsive system Chain of failure: Triggering a fault opens the crack, faults become errors and errors provoke failures. On each step, a fault may accelerate. Tight coupling accelerate cracks. One way to prepare for every possible failure is to look at every external call, every I/O, every use of resources, and ask WHAT IF IT: can't make connection, takes 10 minutes to make the connection, makes connection and then disconnects, takes 10 minutes to respond my query, 10k requests arrive at the same time, ...? IT community is divided into 2 camps: 1. Make system fault-tolerant, catch exceptions, check error codes, keep faults from becoming errors 2. "let it crash", so you can restart from a good known state ## Chapter 4: Stability Anti-patterns Antipatterns that can wreck the system, they create, accelerate or multiply cracks in the system. These bad behaviours should be avoided. You have to set the socket timeout if you want to break out of blocking call, for example request may be stuck in the listening queue for minutes or forever. Network failure can hit you in 2 ways: fast (immediate exception, e.g. connection refused) or slow (dropped ACK). The blocked thread can't process other transactions, so overall capacity is reduced. If all threads are blocked, from practical point of view, the server is down. Sometimes not every problem can be solved at the level of abstraction where it manifests. Sometimes the causes reverberate up and down the layers. You need to know how to drill through at least two layers of abstraction to find the reality at that level in order to understand problems. REST with JSON over HTTP is the lingua franca for services today. HTTP-based protocols have their own issues: - TCP connection can be accepted but never respond to HTTP request - provider may accept the connection but not read the request - provider may send back a response the caller doesn't know how to handle - provider may send back a response with a content type the caller doesn't expect or know how to handle - provider may claim to be sending JSON but in actually sending plain text Treat response as data until you have confirmed it meets your expectations. Libraries can have bugs too, they all have the variability in quality, style, and safety that you see from any other random sampling of code. The most effective stability patterns to combat integration points failures are *Circuit Breaker* and *Decoupling Middleware*. BEWARE NECESSARY EVIL - every integration point will fail in some way, you need to be prepared. PREPARE FOR MANY FORMS OF FAILURE - failure may take several forms: network errors, semantic errors, slow response, ... KNOW WHEN TO OPEN UP ABSTRACTIONS - debugging integration point failures usually requires peeling back a layer of abstraction FAILURES PROPAGATE QUICKLY - failure in remote systems quickly becomes your problem, when your code isn't defensive enough APPLY PATTERNS TO AVERT INTEGRATION POINT PROBLEMS - use patterns like Circuit Breaker, Timeouts, Decoupling Middleware and Handshaking - discussed later Horizontal scaling - adding capacity through adding more servers, fault tolerance through redundancy. Vertical scaling - scaling by building bigger and bigger servers (more cores, memory and storage). RECOGNISE THAT ONE SERVER DOWN JEOPARDISED THE REST - a chain reaction can happen because the death of one server makes the others pick up the slack HUNT FOR RESOURCE LEAK - most of the time, chain reactions happens when application has a memory leak HUNT FOR OBSCURE TIMING BUGS - race conditions can be triggered by traffic, if one server dies because of deadlock, the increased load on the others makes them more likely to hit the deadlock too USE AUTOSCALING - create health-checks for every autoscaling group, the scaler could shut down instances that fail their health checks and start new ones DEFEND WITH BULKHEADS - partitioning servers with Bulkheads - more details later. Cascading failures - occurs when a crack in one layer triggers a crack in a calling layer. If caller handles errors badly it will start to fail, resulting in cascading failure (for example database failure is going to impact any system that is calling the database). Every dependency is a chance for a failure to cascade. - a cascading failure often results from a resource pool (e.g. connection pool) that gets exhausted, safe resource pools always limit the time a thread can wait to check out a resource - defend with timeouts and circuit breaker Capacity is the maximum throughput your system can sustain under a given workload while maintaining acceptable performance. Breaking limits creates cracks in the system. Limits: - heap memory - for example in memory-based sessions, memory can get short- many things can go wrong: out-of-memory exceptions, not working logging. It is possible to use Weak References - Garbage Collection may reclaim memory if it is too low (before out-of-memory error occurs). Callers have to behave nicely when payload is gone. Weak references are useful but they do add complexity. - off-heap memory, off-host memory - for example Redis, but this is slower than local memory and there is a problem with replication - number of sockets on the server is limited, every request corresponds to an open socket, the OS assigns inbound connections to an ephemeral port that represents the receiving side of the connection. Because of TCP packet format, one server can have up to 64 511 connections open. How can we serve millions of concurrent connections? The virtual IP addresses. - closed sockets can be problematic too - before socket can be reused it goes through couple of states, for example bongos defence algorithm. Bogon is a wandering packet that got routed inefficiently and arrives late (out of sequence) , if socket were reused too quickly, late packet could trigger response. Cookies are a clever way to pass state back and forth from client to server and vice versa. They allow all kinds of new applications, such as personalised portals and shopping sites. Cookies carry small amount of data because they need to be encrypted and this is CPU heavy task. A session is an abstraction that makes building applications easier. All the user really sends are series of HTTP requests, the server receives them, compute and returns response. Sessions are about caching data in memory. Truly dangerous users are the ones that target your website, once you are targeted, you will almost certainly be breached. Adding complexity to solve one problem creates the risk of entirely new failure modes, e.g. multithreading - enables scalability but also introduces concurrency errors. Caching can be a powerful response to performance problem, however caching can cause troubles - it can eat away at the memory available for the system, when that happens the garbage collector will spend more and more time attempting to recover enough memory to process requests. You need to monitor hit rates for the cached items to see whether most items are being used from cache. **Caches should be built using weak references to hold the cached item itself.** It will help the GC reclaim the memory. Libraries are notorious sources of blocking threads. Self-Denial Attack - any situation in which the system conspires against itself. For example a coupon code sent to 10k users to be used at certain date is going to attract millions of users (like XBOX preorder). Self-Denial can be avoided by building a shared-nothing architecture (no databases nor other resources) - ideal horizontal scaling. Talk to marketing department when they are going to send out mass emails - you will be able to pre-scale (prepare some additional instances for increased load). Also be careful with open links to the resources, also watch out for Fight Club bugs - increased front-end load causes exponentially increasing backend processing. With point-to-point connections, each instance has to talk directly to every other instance - this means O(n^2) scaling - be careful. Point-to-point communication can be replaced by: UDP broadcasts, TCP/UDP multicast, pub/sub messaging, message queues. XP principle: Do the simplest thing that will work. Watch out for shared resources - they can be a bottleneck, stress-test it heavily, be sure clients will keep working despite malfunctioning resource. Frontend always has the ability to overwhelm the backend, because their capacities are not balanced. However, you can not build every service to be large enough to serve enormous load from the frontend - instead you myst build services to be resilient in the face of tsunami of requests (e.g. Circuit Breaker, Handshaking, Back-pressure, Bulkheads). Dog-pile - when a bunch of servers impose transient load all at once (term from American football). Can occur: when booting all servers at once, on cron job, when the config management pushes out a change. Use random clock slew to diffuse the demand from cron job (every instance does something at different time). Use a backoff algorithm so every client retries at different time. Infrastructure management tools can cause a lot of trouble (e.g. Reddit outage) - build limiters and safeguards into them, so they won't destroy entire system at once. Slow response is worse than refusing a connection or returning an error - because ties up resources in the calling system and in the called system. Slow responses usually result from excessive demand. System should have the ability to monitor its own performance, so it can also tell when it isn't meeting its SLAs (service-level agreement). Why slow responses are dangerous: because they trigger cascading failures, users hitting *reload* button cause even more traffic to already overloaded system. If system tracks its own responsiveness, then it can tell when it is getting slow. In such situation developer should consider sending an immediate error response. > Design with scepticism, and you will achieve resilience. Ask "What can system X do to hurt me" and then design a way > to dodge whatever wrench your supposed ally throws. Use realistic data volumes - typical development and test data sets are too small to exhibit problems, you need production size-data to see what happens when your query returns a million rows that you turn into objects. Calls should be paginated. Do not rely on data providers, once they will go *berserk* and fill up a table for no reason. ## Chapter 5: Stability Patterns Healthy patterns to reduce, eliminate or mitigate the effects of cracks in the system. Apply patterns wisely to reduce the damage done by an individual failure. TIMEOUTS - Today every application is a distributed system, every system must grapple with the fundamental nature of networks - they are fallible. When any element breaks, code can't wait forever for a response that may never come - sooner or later. *Hope is not a design method*. Timeout is a simple mechanism allowing you to stop waiting for an answer once you think it will not come. Well-placed timeouts provide fault isolation - **a problem in some other service does not have to become your problem**. Timeouts can also be relevant within a single service. Any resource pool can be exhausted. Any resource that block threads must have a timeout to ensure that calling threads eventually unblock. Timeouts are often found in the company of retries, fast retries are very likely to fail again (wait between retries). CIRCUIT BREAKER - in the past houses were catching fire because of heated wires, when too many appliances were connected to the power source. Energy industry came up with a device that fails first in order to prevent fire. The circuit breaker exists to fail without breaking the entire system, furthermore once the danger has passed, the circuit breaker can be reset to restore full function to the system. The same technique can be applied to software, dangerous operations can be wrapped with a component that can circumvent call when the system is not healthy. In a closed state, the circuit breaker executes operations as usual (calls to another system or other internal operations that are subject to timeout or other failure), if it fails, the circuit breaker makes a note of the failure. Once the number of failures exceeds a threshold, the circuit breaker opens the circuit. When the circuit is open, calls are suspended - they fail immediately. After some time the circuit decides the operation has a chance of succeeding, so it goes to the half-open state, if the call succeeds - goes to the open state, if not - returns to the open state. The circuit breaker can have different thresholds for different types of failures. Involve stakeholders to decide how system should behave when circuit is open. How to measure number of failures - interesting idea is Leaky Bucket - separate thread counting failures and periodically removing them. If buckets become empty quickly it means, the system is flooded with errors. It should be possible to automatically open/close circuit. Circuit Breaker - don't do it if it hurts. Use it with timeouts. Ensure proper reporting of opened circuit. BULKHEADS - in a ship, bulkheads prevents water from moving from one compartment to another. You can apply the same technique, by partitioning the system, you can keep a failure in one part of the system from destroying everything. This can be achieved by for example running application on multiple servers - if one fails we still have redundancy (e.g. instances across zones and regions in AWS). Bulkhead partitions capacity to preserve partial functionality when bad things happen. Granularity should be picked carefully - thread pools in the application, CPUs, servers in a cluster. Bulkheads are especially useful in service-oriented or microservice architectures in order to prevent chain reactions and entire company go down. STEADY STATE - every time human touches a severer it is an opportunity for unforced errors. It is best to keep people off production systems to the greatest extent possible. People should treat servers as "cattle", not "pets", they should not be logged to the server all the time to watch if everything is fine. The Steady State pattern says that for every mechanism that accumulated a resource (log files, rows in the database, caches in memory), some other mechanism must recycle that resource. Several types of sludge that can accumulated and how to avoid the need for fiddling: - data purging - easy to do, however can be nasty, especially in relational databases there is a risk of leaving orphaned rows, also you need to make sure application will work when the data is gone. - log files - logs are valuable source of information, however if left unchecked are risk. When logs fill up the filesystem, they jeopardise stability. Configure log file retention based on size. Probably best you can do is to store logs on some centralised server (especially if you are required to store logs for years because of compliance regime). Logstash - centralised server for logs, where they can be indexed, searched and monitored. - in-memory caching - improper usage of caching is the major cause of memory leaks, which in turn lead to horrors like daily server restarts. Limit the amount of memory a cache can consume. Steady State encourages better operational discipline by limiting the need for system administrators to log on to the production servers. FAIL FAST - if the system can determine in advance that it will fail; at an operation, it is always better to fail fast - the caller does not have wasted its capacity for waiting. No, you don't need Deep Learning team to tell whether it will fail. Example: if call requires database connection, application can quickly check if database is available. Other approach is to configure load balancer appropriately (no servers - reject request). Use request validation to know if data is correct. The Fail Fast pattern improves overall system stability by avoiding slow responses. LET IT CRASH - there is no way to test everything or predict all the ways a system can break. We must assume that errors will happen. There must be a boundary for trashiness. We want to crash a component in isolation, the rest of the system must protect itself from a cascading failure. In a microservice architecture, a whole instance of the service might be the right granularity. We must be able to get back to clean state and resume normal operation as quickly as possible - otherwise we will see performance degradation. Supervisors need to keep close track of how often they restart child processes. It might be necessary to restart supervisor. Number of restarts can indicate that either the state is not sufficiently cleaned up of the system is in jeopardy and the supervisor is just masking the underlying problem. The final element of a "let it crash" is reintegration - the instance must be able somehow to join the pool to accept the work. This can be done through health checks on instance level. HANDSHAKING - can be most valuable when unbalanced capacities are leading to slow responses. If the sever can detect that it will not be able to meet its SLAs, then it should have some means to ask the caller to back off. It is an effective way to stop cracks from jumping layers, as in the case of a cascading failure. The application can notify the load balancer through a health check that is not able to take more requests (503 - Not Available), then the load balancer knows not to send any additional work to that particular server. TEST HARNESSES - you can create test harnesses to emulate the remote system on the other end of each integration point. A good test harness should be as nasty and vicious as real-world systems will be. A test harness runs as a separate server, so it is not obliged to conform to the defined interface. It can provoke network errors, protocol errors or application level errors. Consider building a test harness that substitutes for the remote end for every web services call. Integration testing environments are good at examining failures only in the seventh layer of the OSI model (application layer) - and not even all of those. The test harness can be designed like an application server - it can have pluggable behaviour for the tests that are related to the real application. Broadly speaking, a test harness leads toward "chaos engineering". The Test Harness pattern augments other testing methods. It does not replace unit tests, acceptance test, penetration tests and so on. DECOUPLING MIDDLEWARE - middleware is a graceless name of tools that inhabit a singularly messy space - integrating systems that were never meant to work together. The connective tissue that bridges gaps between different islands of automation. Middleware, integrates systems by passing data and events back and forth between systems, decouples them by letting the participating systems remove specific knowledge of and calls to the other systems. Tightly coupled middleware amplifies shocks to the systems, synchronous calls are particularly vicious amplifiers that facilitate cascading failures (this includes JSON over HTTP). Message oriented middleware decouples the endpoints in bots space and time, because the requesting system doesn't just sit around and wait for a reply. This form of middleware cannot produce a cascading failure. SHED LOAD - applications have zero control over their demand, at any moment, more that a billion devices could make a request. Services should model TCPs approach: When load gets too high, start to refuse new requests for work. This is related to Fail Fast. The ideal way to define "load is too high" is for a service toi monitor its own performance relative to its SLA. When requests take longer than SLA, it is time to shed some load. CREATE BACK PRESSURE - every performance problem starts with a queue backing up somewhere, if a queue is unbounded, it can consume all available memory. As queue's length reaches toward infinity, response time also heads toward infinity. Blocking the producer is a kind of flow control. It allows the queue to apply "back pressure" upstream. Back pressure propagates all the way to the ultimate client, who will be throttled down in speed until the queue releases. TCP uses back pressure - once the window is full, senders are not allowed to send anything until released. GOVERNOR - machines are great at performing repetitive tasks, humans are great at perceiving high level situation. In 18th century steam engineers discovered it is possible to run machines so fast that the metal breaks. The solution was the governor - a person which limits the speed of an engine. We can create governors to slow down the rate of actions. A governor is stateful and time-aware, it knows what actions have been taken over a period of time. (Reddit uses a governor to slow down the autoscaler, by adding logic that says it can only shut down a certain percentage of instances at a time). The whole point of a governor is to slow things down enough for humans to get involved. ## Chapter 6: Case Study: Phenomenal Cosmic Powers, Itty-Bitty Living Space Launching a new site is like having a baby. You must expect certain thing, such as being awakened in the middle of the night. Monitoring technology provides a great safety net, pinpointing problems when they occur, but nothing beats the patter-matching power of the human brain. Response time is always a lagging indicator. You can only measure the response time on requests that are done. So whatever your worst response time may be, you can't measure it until the slowest requests finish. Requests that didn't complete, never got averaged in. Recovery-Oriented Computing - principles: - Failures are inevitable, in both hardware and software. - Modeling and analysis can be never sufficiently complete. A priori prediction of all failure modes is not possible. - Human action is a major source of system failures. Investigations aim to improve survivability in the face of failures. The ability to restart single components, instead of entire servers, is a key concept of recovery-oriented computing. ## Chapter 7: Foundations Designing for production means thinking about production issues as first-class concerns (network, logging, monitoring, runtime control, security, people who do operations). There are several layers of concerns: 1. Operations - security, availability, capacity, status, communication 2. Control Plane - system monitoring, deployment, anomaly detection, features 3. Interconnect - routing, load balancing, failover, traffic management 4. Instances - services, processes, components, instance monitoring 5. Foundation - hardware, VMs, IPs Virtualization promised developers a common hardware appearance across the bewildering array of physical configurations in the data centre. On the downside, performance is much less predictable. Many virtual machines can reside on the same physical hosts. It is rare to move VMs from one host to another. When designing applications to run in virtual machines you must make sure that they are not sensitive to the loss or slowdown of the host. A clock on the VM is not monotonic and sequential, because VM can be suspended for an indefinite span of real time. The bottom line is: don't trust the OS clock. If external time is important, use an external source like a local NTP server. Containers have short-lived identity. As a result, it should not be configured on a per-instance basis. Container won't have much, if any, local storage, so the application must rely on external storage for files, data, and maybe even cache. When you design an application for containers, keep a few things in mind: the whole container image moves from environment to environment, so the image can't hold things like production database credentials. Containers should not contain hostnames or port numbers - because the setting needs to change dynamically while the container image stays the same. Containerised applications need to send their telemetry out to a data collector. The 12-Factor App [12factor.net] - created by engineers at Heroku, is a succinct description of a cloud-native, scalable, deployable application: 1. Codebase - track one codebase in revision control. Deploy the same build to every environment. 2. Dependencies - explicitly declare and isolate dependencies. 3. Config - store config in the environment. 4. Backing services - treat backing services as attached resources. 5. Build, release, run - strictly separate build and run stages. 6. Process - execute the app as one or more stateless processes. 7. Port binding - export services via port binding. 8. Concurrency - scale out via process model 9. Disposability - maximise robustness with fast startup and graceful shutdown. 10. Dev-prod parity - keep environment, staging and production as similar as possible. 11. Logs - treat logs as event streams. 12. Admin processes - run admin / management tasks as one-off processes. ## Chapter 8: Processes on Machines Service - a collection of processes across machines that work together to deliver a unit of functionality. Instance - an installation on a single machine out of a load-balanced array of the same executable. Executable - an artefact that a machine can launch as process and created by build process. Process - an operating system process running on a machine. Installation - the executable and any attendant directories, configuration files and other resources. Deployment - the act of creating an installation on a machine. Developers should not do production builds from their now machines. Developer boxes are hopelessly polluted. We install all kinds of junk on these systems, play games and visit sketchy websites. Only make production builds on a CI server, and have it put the binary into a safe repository that nobody else can write into. Configuration management tools like Chef, Puppet and Ansible are all about applying changes to running machines. They use scripts, playbooks or recipes to transition the machine from one state to a new state. We don't want our instance binaries to change per environment, but we do want their properties to change. That means the code should look outside the deployment directory to find per-environment configurations. ZooKeeper and etc are popular choices for a configuration service - but any outage to these systems can cause a lot of trouble. Shipboard engineers can tell when something is about to go wrong by the sound of the giant Diesel engines. We must facilitate that awareness by building transparency into our systems. Transparency refers to the qualities that allow operators, developers and business sponsors to gain understanding of the system's historical trends, present conditions, instantaneous state and future projections. Debugging a transparent system I s vastly easier, so transparent systems will mature faster that opaque ones. System without transparency cannot survive long in production. Transparency arises from deliberate design and architecture. Instances should log their health and events to a plain old text file. Any log-scraper can collect these without disturbing the server process. Logging is certainly a white-box technology, it must be integrated pervasively into the source code. Not every exception needs to be logged as an error. Just because a user entered a bad card number and the validation compound threw an exception doesn't mean anything has to be done about it. Log errors in business logic or user input as WARNINGs. Reserve ERROR for a serious system problem. Logs have to present clear, accurate and actionable information to the humans who read them. Message should include an identifier that can be used to trace the steps of a transaction. Health Checks should be more that just "yup, it is running", it should report at least: IP, interpreter version, application version, if instance is accepting work, the status of connection pools, caches and circuit breakers. Load balancer can use the health check for the "go live" transition too. When the health check on a new instance goes from failing to passing, it means the app is done with its startup. ## Chapter 9: Interconnect The interconnect layer covers all the mechanisms that knit a bunch of instances together into a cohesive system. That includes a traffic management. Load balancing and discovery. This is the layer where we can really create high availability. Consul - dynamic discovery service, suited for large teams with hundreds of small services. On the other hand small business with just a few developers would probably stick with direct DNS entries. DNS might be the best choice for small teams, particularly in a slowly changing infrastructure. When using DNS, it is important to have a logical service name to call, rather than physical hostname. Even if that logical name is just an alias to the underlying host, it is still preferable. DNS round-robin an easy approach to load balancing but suffers from putting too much control in the client's hands. DNS outage can be serious, do it should not be hosted on the same infrastructure as production system. There should be more than one DNS provider with servers on different locations. Almost everything we build today uses horizontally scalable farms of instances that implement request/reply semantics. Horizontal scaling helps with overall capacity and resilience, but it introduces the need for load balancing. Load balancing is all about distributing requests across a pool of instances to serve all requests correctly in the shortest feasible time. Software Load Balancing - low cost approach, uses an application to listen for requests and dole them out across the pool of instances. This is basically a reverse proxy (proxy - multiplexes any outgoing calls into a single source IP address, reverse proxy - demultiplexes calls coming into a single IP address and fans them out to multiple addresses). Examples: squid, HAProxy, Apache httpd, nginx. Hardware Load Balancing - specialised network devices that serve a similar role to the reverse proxy server. They provide better capacity and throughput because they operate closed to the network. One of the most important services a load balancer can provide is service health checks. The load balancer will not send to an instance that fails a certain number of health checks. Load balancers can also attempt to direct repeated requests to the same instance. This helps when you have stateful services, like user session state in an application server. Directing the same requests to the same instances will provide better response time for the caller because necessary resources will already be in that instance's memory. A downside of sticky sessions is that they can prevent load from being distributed evenly. Another useful way to employ load balancer is "content based routing". For example, search requests may go to one set of instances, while user-signup requests go somewhere else. Demand Control - when, where and how to refuse to work under big demand. > Every failing system starts with a queue backing up somewhere. Going nonlinear - service slowing down under heavy load, this means fewer and fewer sockets available to receive requests exactly when the most requests are coming in. Load shedding - under high load, turning away work system can't complete in time, the most important way to control incoming demand. We want to shed load as early as possible, so we can avoid tying up resources at several tiers before rejecting the request. Service should measure its response time and present it in the health check. Service discovery. Services can announce themselves to begin receiving a load. A caller needs to know at least one IP address to contact a particular service. Service discovery is itself another service, it can fail or get overloaded. Service discover can be built on top of a distributed data store such as ZooKeeper or etc. In CAP theorem, ZooKeeper is a CP system - when there is a network partition, some nodes will not answer queries or accept writes. HashiCorp's Consul resamples ZooKeeper, however Consul's architecture places it in the AP area - it prefers to remain available and risk stale information when a partition occurs. ## Chapter 10: Control Plane The control plane encompasses all the software and services that run in the background to make production load successful. One way to think about it is this: if production user data passes through it, it is production software. If its main job is to manage other software, it is control plane. Every part of control plane is optional if you are willing to make trade-offs. - for example: logging and monitoring helps with postmortem analysis, without it all those will take longer or simply not be done. Mechanical advantage is the multiplier on human effort that simple machines provide. With mechanical advantage, a person can move something much heavier than themselves. It works for good of for ill. High leverage allows a person to make large changes with less effort. Every postmortem review has 3 important jobs to do: Explain what happened. Apologise. Commit to improvement. Automation has no judgement. When it goes wrong, it tends to do so really, really quickly. By the time a human perceives the problem, it is a question of recovery rather than intervention. We should use automation for the things humans are bad at: repetitive tasks and fast response. We should use humans for the things' automation is bad at: perceiving the whole situation at a higher level. Monitoring team should be responsible for providing monitoring tools - offer a monitoring service to customers. Log collectors can work in push (the instance is pushing logs over the network, helpful with containers) or pull mode ( the collector runs on a ventral machine and reaches out to all known hosts to remote-copy the logs). Getting all the logs on one host is a minor achievement, the real beauty comes from indexing the logs - then you can search for patterns, make trend line graphs and raise alerts when bad things happen. This can be done using Elasticsearch, Logstash and Kibana. Categories of metrics that can be useful: - Traffic indicators - page requests, transaction count - Business transaction for each type - number processed, aborted, conversion rate - Users - demographics, number of users, usage patterns, errors encountered - Resource pool health - enabled state, total resources, number of resources created, number of blocked threads - Database connection health - number of SQLExceptions thrown, number of queries, average response time - Data consumption - number of rows present, footprint in memory and on disk - Integration point health - state of circuit breaker, number of timeouts, number of requests, average response time, number of good responses, number of network, protocol errors, actual IP address - Cache health - items in cache, memory used by cache, cache hit rate, items flushed by garbage collector Canary deployment - a small set of instances that get the new build first. For a period of time, the instances running the new build coexist with instances running the old build. The purpose of the canary deployment is to reject a bad build before it reaches the users. The net result is that GUIs make terrible administrative interfaces for long-term production operation. The best interface for long-term operation is the command line. Given a command line, operators can easily build a scaffolding of scripts, logging and automated actions to keep your software happy. ## Chapter 11: Security Security must be baked in. It is not a seasoning to sprinkle onto your system at the end. You are responsible to protect your consumers and your company. OWASP Top 10 - catalogued application security incidents and vulnerabilities. Top 10 list represents a consensus about the most critical web application security flaws: 1. Injection - an attack on a parser or interpreter that relies on user-supported input. Classic example - SQL injection, it happens when code bashes strings together to make queries, but every SQL library allows the use of placeholders in query strings. Keep in mind that "*comes from a user*", doesn't only mean the input arrived just now in an HTTP request, data from a database may have originated from a user as well. XML parsers are vulnerable as well (XXE injection). 2. Broken Authentication and Session Management - at one time, it was common to use query parameters on URLS and hyperlinks to carry session IDs, not only are thoseIDs are visible to every switch, router and proxy server, they are also visible to humans. Anyone who copies and pastes their link from their browser shares their session. Session hijacking can be dangerous when it is stolen from administrator. OWASP suggest the following guidelines for handling session IDs: 1. Use long session ID with lots of entropy 2. Generate session ID using a pseudorandom number generator with good cryptographic properties (`rand` is not a good choice) 3. Protect against XSS to avoid script execution that would reveal session ID 4. When user authenticates, generate a fresh session ID 5. Keep up to date with security patches and versions, too many systems run outdated versions with known vulnerabilities 6. Use cookies to exchange session IDs, do not accept session IDS via other mechanisms *Authentication* means we verify the identity of the caller. Is the caller who he or she claims to be? Some dos and don't: 1. Don't keep passwords in your database 2. Never email a password to a user as a part of "*forgotten password*" process 3. Do apply strong hash algorithm to password. Use "*salt*, which is some random data added to the password to make dictionary attacks harder 4. Do allow users to enter overly long passwords 5. Do allow users to paste passwords into GUIs 6. Do allow users to paste passwords into GUIs 7. Do plan on rehashing passwords at some point in the future. We have to keep increasing the strength of our hash algorithms. Make sure you can change the salt too 8. Don't allow attackers to make unlimited authentication attempts 3. Cross-site Scripting - happens when a service renders a user's input directly into HTML without applying input escaping, it is related to injection attacks. Bottom line is: never trust input, scrub it on the way and escape it on the way out. Don't build structured data by smashing strings together. 4. Broken Access Control - refers to application problems that allow attackers to access data they shouldn't. One of common forms of broken access control is "*direct object access*", this happens when a URL contains something like a database ID as a query parameter. Solution for this is to reduce the value of URL probing and checking authorisation to objects in the first place. Generate unique but non-sequential identifiers or use a generic ULR that is session-sensitive (`/users/123` -> `/users/me`). Rule of thumb: *If a caller is not authorised to see the contents of a resource, it should be as if the resource doesn't even exist* (`404` instead of `403`). When a request involves a file upload, the caller can overwrite any file the service is allowed to modify. The only safe way to handle file uploads is to tread the client's filename as an arbitrary string to store in a database field. Don't build a path from the filename in the request. 5. Security Misconfiguration - default passwords are a serious problem. Security misconfiguration usually takes the form of omission. Servers enable unneeded features by default. Admin consoles are a common source of problems. Another common security misconfiguration relates to servers listening too broadly. You can improve information security right away by splitting internal traffic onto its own NIC separate from public-facing traffic. Make sure every administrator uses a personal account, not a group account. Go ahead and add some logging to those administrative and internal calls. 6. Sensitive Data Exposure - credit cards, medical records, insurance files, purchasing data, emails - all these valuable things people can steal from you or use against you. Hackers don't attack your strong points, they look for cracks in your shell. It can be as simple as employee's stolen laptop with a database extract in a spreadsheet. Some guidelines: 1. Don't store sensitive information that you don't need 2. Use HTTP Strict Transport Security - it prevents clients from negotiating their way to insecure protocols 3. Stop using SHA-1 4. Never store passwords in plain text 5. Make sure sensitive data is encrypted in the database 6. Decrypt data based on the user's authorisation, not the server's Consider using AWS Key Management Service. Application can request data encryption keys, which they use to encrypt or decrypt data. HashiCorp Vault - alternative to AWS KMS. 7. Insufficient Attack Protection - always assume that attackers have unlimited access to other machines behind firewall. Services do not typically track illegitimate requests by their origin. They do not block callers that issue too many bad requests. That allows an attacking program to keep making calls. API Gateways are a useful defence here. An API Gateway can block callers by their API key. It can also throttle their request rate. Normally this helps preserve capacity. In the case of an attack, it slows the rate of data compromise, thereby limiting the damage. 8. Cross-Site Request Forgery - used to be a bigger issue than it is now. A VCSRF attack starts on another website, an attacker uses a web page with JS, CSS or HTML that includes a Lin to your system. When the hapless user's browser accesses your system, your system thinks it is a valid request from that user. Make sure that requests with side effects (password change, mailing address update, purchases) use anti-CSRF tokens. These are extra fields containing random data that your system emits when rendering a form. Most frameworks today do this for you. You can also tighten up your cookie policy with the "*SameSite*" property. The SameSite attribute causes browser to send the cookie only if the documents' origin is the same as the target's origin. SameSite cookie may require change session management approach. 9. Using Components with Known Vulnerabilities - most successful attacks are not the exciting "*zero day, rush to patch before they get it*". Most attacks are mundane. It is important to keep applications up-to-date. 10. Underprotected APIs - it is essential to make are sure that APIs are not misused. APIs must ensure that malicious request cannot access data the original user would not be able to see. API should use the most secure means available to communicate. Make sure the parser is hardened against malicious input. Fuzz-testing APIs is especially important. The principle of Least Privilege - a process should have the lowest level of privilege needed to accomplish the task. Anything application services need to do, they should do as nonadministrative users. Containers provide a nice degree of isolation from each other. Instead of creating multiple application-specific users on the host operating system, you can package each application into its own container. Configured Passwords - at the absolute minimum, passwords to production databases should be kept separate from any other configuration files. Password vaulting keeps passwords in encrypted files, which reduces the security problem. AWS Key Management Service is useful here. With KMS applications use API calls to acquire decryption keys. That way the encrypted data don't sit in the same storage as the decryption keys. Frameworks can't protect you from the Top 10, neither can a one-time review by your company's applications security team. Security is an ongoing activity. It must be part of system's architecture. You must have a process to discover attacks. ## Chapter 12: Case Study: Waiting for Godot ## Chapter 13: Design for Deployment How to design applications for easy rollout - packaging, integration point versioning and database schema. Once upon a time, we wrote our software, zipped it up and threw it over the wall to the operations, so they could deploy it. Operations would schedule some *planned* downtime to execute the release. HOWEVER, users should not care about downtime, application should be updated without them knowing about the release. Most of the time, we design for the state of the system after a release. It assumes the whole system can be changed in some instantaneous quantum jump. We have to treat deployment as a feature. Three key concerns: automation, orchestration and zero-downtime deployment. AUTOMATED DEPLOYMENTS. Build pipeline is the first tool of interest. It picks up after someone commits a change to VCS. Build pipelines are often implemented with CI servers. CI would stop after publishing a test report and an archive, the build pipeline goes beyond - run a series of steps that culminate in a production deployment (deploy code to trial env, run migrations, perform integration tests). Each stage of build pipeline is looking for reasons to reject the build - failed tests, lint complaints, integration fails. Tools: Jenkins, GoCD, Netflix Spinnaker, AWS Code Pipeline. Do not look for the best tools, pick one that suffices and get good with it. Avoid analysis trap. At the end of the build pipeline, build served interacts with one of the configuration management tools. Between the time a developer commits code to the repository and the time it runs in production, code is a pure liability. It may have unknown bugs, may break scaling or cause production downtime. It might be a great implementation of a feature nobody wants. The idea of continuous deployment is to reduce that delay as much as possible to reduce the liability of undeployed code. A bigger deployment with more change is definitely riskier. "*If it hurts, do it more often*" - do everything continuously, for the build pipeline it means - run the full build on every commit. Shim - a thin piece of wood that fills a gap where two structures meet. In deployments, shim is a bit of code that helps join old and new versions of the application. For example when migrating database, old instances will read from the old table, new instances will be reading from the new table. Shims can be achieved using SQL triggers - insert to one table is propagated to the other. [MUTABLE INFRASTRUCTURE] We typically update machines in batches. You must choose to divide your machines into equal-sized groups. Suppose we have five groups: Alpha, Bravo, Charlie, Delta, Foxtrot. Rollout would go like this: 1. Instruct Alpha to stop accepting new requests 2. Wait for load to drain from Alpha 3. Run the configuration management tool to update code and config 4. Wait for green health checks on all machines in Alpha 5. Instruct Alpha to start accepting requests 6. Repeat the process for Bravo, Charlie, Delta, Foxtrot First group should be the canary group. Pause there to evaluate the build before moving on to the next group. Use traffic shaping at your load balancer to gradually ramp up the traffic to the canary group while watching monitoring for anomalies and metrics. Every application should include an end-to-end health check. [IMMUTABLE INFRASTRUCTURE] To roll code out here, we don't change the old machines. Instead, we spin up new machines on the new version of the code. Machines can be started in the existing cluster or in a new cluster. With frequent deployments, you are better off starting new machines in the existing cluster, that avoids interrupting open connections when switching between clusters. Be careful about cache and session. Remember about the post-rollout cleanup - drop old tables, views, columns, aliases, ... DEPLOY LIKE THE PROS - Currently deployments are frequent and should be seamless. The boundary between operations and development has become fractal. Designing for deployment gives the ability to make large changes in small steps. This all rests on a foundation of automated action and quality checking. The build pipeline should be able to apply all the accumulated wisdom of your architects, developers, designers, testers and DBAs. Software should be designed to be deployed easily. Zero downtime is the objective. Smaller, easier deployments mean you can make big changes over a series of small steps. That reduces disruption to your users. ## Chapter 14: Handling Versions It is better for everyone if we do some extra work on our end to maintain compatibility rather than pushing migration costs out onto other teams. How your software can be a good citizen? Each consuming application has its own development team that operates on its own schedule. If you want others to respect your autonomy, then you must respect theirs. That means you can't force consumers to match your release schedule. Trying to coordinate consumer and provider deployments doesn't scale. TCP specification (Postel's Robustness Principle): > Be conservative in what you do, be liberal in what you accept from others. Consumer and provider must share a number of agreements in order to communicate: connection handshaking and duration, request framing, content encoding, message syntax, message semantics, authorisation and authentication. Postel's Robustness Principle can be seen as Liskov Substitution Principle: We can always accept more than we accepted before, but we cannot less or require more. We can return more than we returned before, but we cannot return less. Handling breaking changes - best approach is to add a version discriminator to the URL. This is the most common approach. You have to support both the old and the new versions for some period of time. Both versions should operate side by side. This allows consumers to upgrade as they are able. Internally you want to avoid duplication. Handle this in the controller, methods that handle the new API go directly to the most current version of the business logic, methods that handle the old API get updated, so they convert old objects to the current ones on requests and convert new objects to old ones on responses. When receiving requests or messages, your application has no control over the format. The same goes for calling out to other services. The other endpoint can start rejecting your requests at any time. After all, they may not observe the same safety rules we just described. Always be defensive. ## Chapter 15: Case Study: Trampled by Your Own Customers Conway's Law: > If you have four teams working on a compiler, you will get a form-pass compiler. Conway argues, two people must - in some fashion - communicate about the specification for that interface. If the communication does not occur, interface cannot be built. Sometimes when you ask questions, but you don't get answers, it means nobody knows the answers. At other times, it means nobody wants to be seen answering the questions. Load testing is about: defining a test plan, creating some scripts, configuring the load generators and test dispatchers. Tests often are prepared wrongly, real word is crude and rude, there are scrapers not respecting your cookie policy, search browsers indexing your website, users doing weird stuff. Most websites have terms and conditions stating "*By viewing this page you agree to ...*", with this you can sue or at least block sources of bots hitting your website millions of times. ## Chapter 16: Adaptation To make a change, your company has to go through a decision cycle - plan -> do -> check -> act. In small companies this communication may involve just one or two people, in larger companies an entire committee. Getting around the cycle faster makes you more competitive. This drives the "*fail fast*" motto for startups. Agile and lean development methods helped remove delay from "act", DevOps helps remove even more in "act" and offers tons of new tools to help with "observe". Thrashing - happens when organisation changes direction without taking the time to receive, process and incorporate feedback. You may recognise it as constantly shifting development priorities or an unending series of crises. It creates team confusion, unfinished work and lost productivity. To avoid trashing, try to create a steady cadence of delivery and feedback. The platform team should not implement all your specific monitoring rules, instead this team should provide an API that lets you install your monitoring rules into the monitoring service provided by the platform. > If your developers only use the platform because it is mandatory, then the platform is not good enough The Fallacy of the DevOps Team - in larger companies, it is common to find a group called DevOps team. This team sits between development and operations with the goal of moving faster and automating releases into production. *This is an antipattern*. DevOps should soften the interface between different teams. DevOps goes deeper than deployment automation. It is a shift from ticket and blame-driven operations with throw-it-over-the-wall releases TO one based on open sharing of information and skills, data-driven decision-making about architecture and design, production availability and responsiveness. Isolating these ideas to a single team undermines the whole point. Frequent releases with incremental functionality allow your company to outpace its competitors. Blue/green deployment - machines are divided into pools. One pool is active in production. The other pool gets the new deployment. That leaves time to test it before exposing it to customers. Once the new pool looks good, you shift production traffic over to it. More code, means it is harder to change. Large codebases are more likely to become overgeneralised. A shared database means every change has a higher potential to disrupt. The big service will accumulate complexity faster than the sum of two smaller services. It is easier to maintain and prune a bonsai juniper than a hundred-foot oak. The key to making evolutionary architecture work is failure. You have to try different approaches to similar problems and kill the ones that are less successful. Jeff Bezos said that every team should be sized no bugger than you can feed with 2 large pizzas. Important but misleading. It is not just about having fever people on a team. A self-sufficient two-pizza team also means each team member has to cover more than one discipline. You can't have a two-pizza team if you need a dedicated DBA, frontend developer, an infra guru a backend developer, an ML expert, a product manager, a GUI designed, and so on. The two-pizza team is about reducing external dependencies. A thousand of dependencies will keep you from breaking free. It is really about having a small group that can be self-sufficient and push things all the way through to production. No coordinated deployments - If you ever find that you need to update both the provider and the caller of a service interface at the same time, it is a warning sign that those services are strongly coupled. Evolutionary architecture is the one that supports incremental, guided c change as a first principle across multiple dimensions. Architecture styles: - Microservice - very small, disposable units of code. Emphasise scalability, team-scale autonomy. Vulnerable to coupling with platform for monitoring, tracing and continuous delivery - Microkernel and plugins - in-process, in-memory message passing core with formal interfaces to extensions. Good for incremental change in requirements, combining work from different teams. Vulnerable to language and runtime environment. - Event-based - prefers asynchronous messages for communication, avoiding direct calls. Good for temporal decoupling, Allows new subscribers without change to publishers. Allows logic change and reconstruction from history. Vulnerable to semantic change in message formats over time. Microservice size: ideally it should be no bigger than what fits in one developer's head. Don't pursue microservices just because the Silicon Valley unicorns are doing it. Make sure they address a real problem you are likely to suffer. Otherwise, the operational overhead and debugging difficulty of microservices will outweigh your benefits. Systems should exhibit loose clustering. In a loose cluster, the loss of an individual instance is no more significant than the fall of a single tree in a forest. The members of a cluster should not be configured to know the identities of other members of the cluster. Modular systems inherently have more options than monolithic ones. 5 modular operators - borrowed from a hardware: 1. Splitting - breaking things into modules, or a module into submodules. The key with splitting is that the interface to the original modules is unchanged. Before splitting, it handles the whole thing itself. Afterward, it delegates work to the new modules but supports the same interface. 2. Substituting - is just replacing one module with another (like swapping nVidia card with AMD). The original module and the substitute need to share a common interface. 3. Augmenting and Excluding - augmenting is adding a module to a system. Excluding is removing one. If you design your parent system to make augmenting and excluding into first-class citizens, then you will reach a different design. 4. Inversion - works by taking functionality that is distributed in several modules and raising it up higher in the system. 5. Porting - is about repurposing a module from a different system. Any time we use a service created by a different project or system, we are porting that service to our system. Porting risks adding a coupling. Information architecture is how we structure data. It is the data and the metadata we used to describe the things that matter to our systems. It is a set of related models that capture some facets of reality. Your job in building systems is to decide what facets of reality matter to your system, how are you going to represent those and how that representation can survive over time. Events can be used for: - Notifications - fire and forget, one-way announcement, no response is expected - Even-carried state transfer - an event that replicates entities or parts of entities so other systems can do their work - Event sourcing - when all changes are recorded as events that describe the change - Command-query responsibility segregation - reading and writing with different structures. Not the same as events, but events are often found on the "command" side. Versioning can be a real challenge with events, especially once you have years' worth of them. Stay away from closed formats like serialised objects. Look toward open formats like JSON or self-describing messages. Avoid frameworks that require code generation based on schema. Treat messages like data instead of objects, and you are going to have a better time supporting very old formats. Extract "*policy proxy*", questions of ownership and access control can be factored out of the service itself into a more centrally controlled location. Use URL dualism to support many databases by using URLs as both the item identifier and a resolvable resource. Be careful you should be able to verify that whatever you receive back is something you generated. One of the basic enterprise architecture patterns is the "Single System of Record". The idea is that any particular concept should originate in exactly one system, and that system will be enterprise-wide authority on entities within that concept. We need to be careful about exposing internal concepts to other systems. It creates semantics and operational coupling that hinders future change. ## Chapter 17: Chaos Engineering Chaos engineering - the discipline of experimenting on a distributed system in order to build confidence in the system's capability to withstand turbulent conditions in production. Staging or Qa environments aren't much of a guide to the large-scale behaviour of systems in production. Congested networks behave in a qualitatively different way than uncontested ones. Systems that work in a lo-latency, low-loss network mat break badly in a congested network. Related paradox - *Volkswagen microbus* - you learn how to fix the things that often break. You don't learn how to fix the things that rarely break. But that means when they do break, the situation is likely to be more dire. We want a continuous low level of breakage to make sure our system can handle the big things. We use chaos engineering the way a weightlifter uses iron: to create tolerable levels of stress and breakage to increase the strength of the system over time. At Netflix, chaos is an opt-out process. That means every service in production will be subject to Chaos Monkey. Other companies adopting chaos engineering have chosen an opt-in approach. When you are adding chaos engineering to an organisation, consider starting with opting-in. You must be able to break the system without breaking the bank. It that is not the case, chaos engineering is not for you. > If you have a wall full of green dashboards, that means your monitoring tools aren't good enough. There is always > something weird going on. Make sure you have a recovery plan. The system may not automatically return to a healthy state when you turn off the chaos. You need to know what to restart, disconnect or clean up. Chaos Monkey does one kind of injection - it kills instances (randomly). There are different types of monkeys: Latency Monkey, Janitor Monkey, Chaos King, ... Killing instances is the most basic and crude kind of injection. It will absolutely find weaknesses in your system. Netflix uses failure injection testing (FIT). FIT can tag a request at the inbound edge with a cookie that says, e.g. " Does the line, this request is going to fail when service G calls service H". Netflix uses a common framework for all its outbound service calls, so it has a way to propagate this cookie and treat it uniformly. High-reliability organisations use drills and simulations to find the same kind of systematic weaknesses in their human side as in the software side. You can make this more fun by calling it a "*zombie apocalypse simulation*". Randomly select 50% of your people and tell them they are zombies for the rest of the day. After the simulation review the issues. ================================================ FILE: books/system-design-interview.md ================================================ [go back](https://github.com/pkardas/learning) # System Design Interview Book by Alex Xu & Sahn Lam - [1. Proximity Service](#1-proximity-service) ## 1. Proximity Service ================================================ FILE: books/tidy-first.md ================================================ [go back](https://github.com/pkardas/learning) # Tidy First? Book by Kent Beck - [1. Guard Classes](#1-guard-classes) - [2. Dead code](#2-dead-code) - [3. Normalize symmetries](#3-normalize-symmetries) - [4. New Interface, Old implementation](#4-new-interface-old-implementation) - [5. Reading Order](#5-reading-order) - [6. Cohesion Order](#6-cohesion-order) - [7. Move Declaration and Initialization Together](#7-move-declaration-and-initialization-together) - [8. Explaining variables](#8-explaining-variables) - [9. Explaining constants](#9-explaining-constants) - [10. Explicit parameters](#10-explicit-parameters) - [11. Chunk statements](#11-chunk-statements) - [12. Extract helper](#12-extract-helper) - [13. One pile](#13-one-pile) - [14. Explaining comments](#14-explaining-comments) - [15. Delete redundant comments](#15-delete-redundant-comments) - [16. Separate Tidying](#16-separate-tidying) - [17. Chaining](#17-chaining) - [17. Chaining](#18-batch-sizes) - [18. Batch Sizes](#18-batch-sizes) - [19. Rhythm](#19-rhythm) - [20. Getting Untangled](#20-getting-untangled) - [21. First, After, Later, Never](#21-first-after-later-never) - [22. Beneficially Relating Elements](#22-beneficially-relating-elements) - [23. Structure and behavior](#23-structure-and-behavior) ## 1. Guard Classes If you see code like: ``` if condition: ... ``` or ``` if condition: if another condition: ... ``` tidy the above to: ``` if not condition: return if not another condition: return ... ``` Exit immediately, it is easier to read -- before we get into the details, there are some preconditions we need to bear in mind. https://github.com/Bogdanp/dramatiq/pull/470 ## 2. Dead code Delete it. If you need it later, use version control. Delete only a little code in each tidying diff. Just in case, if it turns out that you were wrong, it will be easy to rever the change. ## 3. Normalize symmetries Tidy forms of unnecessary variations. Use common style for your functions. Things get confusing when two or more patterns are used interchangeably. ## 4. New Interface, Old implementation If some interface you need to use is very difficult to use, implement the interface you wish you could call and call it. Implement the interface by simply calling the old one. ## 5. Reading Order Reorder the code in the file in the order in which a reader would prefer to encounter it. ## 6. Cohesion Order If 2 functions are coupled, put them next to each other, if 2 files are coupled, put them in the same directory, ... If you know how to eliminate coupling, go for it. ## 7. Move Declaration and Initialization Together It is easier to understand the code if each of the variables is declared and initialized just before it's used. It is hard to read when declaration is separated from initialization. ## 8. Explaining variables When you understand a part of a big, hairy expression, extract the subexpression into a variable named after the intention of the expression. Always separate the tidying commit from the behaviour change commit. ## 9. Explaining constants Create a symbolic constant. Replace uses of the literal constant with the symbol. ## 10. Explicit parameters It's common to see blocks of parameters passed in a map. This makes it hard to read and understand what data it required. Make the parameters explicit: ``` foo(params) -> foo(a, b) ``` ## 11. Chunk statements The simplest tidying. Put a blank line between 2 parts doing different things. After you've chunked statements, you have many paths forward: Explaining Variables, Extract Helper or Explaining Comments. ## 12. Extract helper A block of code that has an obvious purpose and limited interaction with the rest of the code can be extracted into a helper function. Using the helper can be taken care of in another tidying. ## 13. One pile Sometimes you read the code that has been split into many tine pieces, which makes it hard to understand. The biggest cost of code is the cost of reading it, not the cost of writing it. Sometimes in order to regain the clarity, the code must be merged together, so new, easier-to-understand parts can be extracted. ## 14. Explaining comments Write down only what wasn't obvious from the code. Put yourself in the place of the future reader, or yourself 15 minutes ago. Immediately upon finding a defect is a good time to comment. It is much better to add the comment that points out the issue, rather than leaving it buried in the sand. ## 15. Delete redundant comments When you see a comment that says exactly what the code says, remove it. ## 16. Separate Tidying Tidying should go into their own separate PRs, with as few tidyings per PR as possible. Behavior and structure changes should be in separate PRs. ## 17. Chaining Tidying can set up another tidyings. You will begin to flow tidyings together to achieve larger changes to the structure of your code. Be wary of changing too much, too fast. A failed tidying is expensive relative to the cost of a series of successful tidyings. ## 18. Batch Sizes The more tidyings per batch, the longer the delay before integrating, and the greater the chance that a tidying collides with someone else is doing. The change of a batch accidentally changing behavior rises with the number of tidyings in the batch. The more tidyings per batch, the more we are prone to tidying just because, with all the additional costs that creates. In many orgs, the fixed cost of getting a single change through review and deployment is substantial. Programmers feel this cost, so they move right in the trade-off space (despite collisions, interactions, ...). ## 19. Rhythm More than an hour of tidying at a time before making a behavioral change likely means you've lost track of the minimum set of structure changes needed to enable your desired behavior change. Tidying is a minutes-to-an-hour kind of activity. Sometimes it may take longer, but not for long. ## 20. Getting Untangled Tidying leads to more and more tidying. What to do? 3 options: 1. Ship as it is [very impolite, prone to errors, but quick] 2. Untangle the tidyings into separate PRs [more polite, but may require a lot of work] 3. Start over, tidying first [more work, but leaves a coherent chain of commits] Re-implementation raises the possibility that you will see something new as you re-implement, letting you squeeze more value out of the same set of behavioral changes. ## 21. First, After, Later, Never **Never** - you are never changing this code again - there is nothing to learn by improving the design **Later** - you have a big batch of tidying to do without immediate payoff - there is eventual payoff for completing the tidying - you can tidy in little batches **After** - waiting until next time to tidy first will be more expensive - you won't feel a sense of completion if you don't tidy after **First** - it will pay off immediately, either in improved comprehension or in cheaper behavior changes - you know what to tidy and how ## 22. Beneficially Relating Elements Software design is beneficially relating elements. Elements: Tokens -> Expressions -> Statements -> FUnctions -> Objects/modules -> Systems. Elements have boundaries. Relating: In software design we have a handful of relations like: - invokes - publishes - listens - refers Beneficially relating elements. Software designers can only: - Create and delete elements - Create and delete relationships - Increase the benefit of a relationship ``` caller() return box.width() + box.height() ``` This function has 2 relationships with the box. This relationship can be adjusted. we can have `box.area()`. ``` caller() return box.area() ``` The benefit is that it is simpler and the cost is that `box` has additional method. ## 23. Structure and behavior Software creates value in two ways: - what it does today - the possibility of new things we can make it do tomorrow Behavior creates value. Rather than having to calculate a bunch of numbers by hand, the computer can calculate millions of the every second. If running software costs $1, you can charge folks $10 to run it on their behalf, then you have a business. The structure creates options. The structure could make it easy to add new features to our system, or it could make it hard. ## 24. Economics: Time Value and Optionality - A dollar today is worth more than a dollar tomorrow, so earn sooner and spend later - you can't spend it so it's worthless - you can't invest it - there's some chance that you won't get the dollar - in the scope of this book: the time value of money encourages tidy after over tidy first - In a chaotic situation, options are better than things, so create options in the face of uncertainty Software design has to reconcile the imperatives of "earn sooner/spend later" abd "create options, not things". ================================================ FILE: books/understanding-distributed-systems.md ================================================ [go back](https://github.com/pkardas/learning) # Understanding Distributed Systems: What every developer should know about large distributed applications Book by Roberto Vitillo ================================================ FILE: case-studies/reddit.md ================================================ [go back](https://github.com/pkardas/learning) # How Reddit mastered managing growth *Presentation by Greg Taylor* 330M monthly active users. 8th most popular website in the World. 12M posts per month. 2B votes per month. Reddit in 2016 - small engineering team with a monolith application. The Infrastructure team was responsible for provisioning and configuring all infrastructure, operating most of the systems and handling non-trivial debugging. Static infrastructure. This approach worked for more than a decade. In 2016 team started rapidly growing. But monolith application was so fragile, every deploy was an adventure - blocker for the organisation. How to make everyone's life easier? How to onboard new employees? Reddit decided to pursue with SOA - Service-Oriented-Architecture. This gave better separation of concerns between teams. However, if you have a monolith, and it works well for you: "go home, give it a hug, tell it you love it, warts and all". Growing pains: Automated tests - they started using CI, master branch always had to be green. Growing pains: Something to build on - instead of copying and pasting services out from another they needed to have a service framework to base off of. Services are configured in the same way, they expose similar set of ports, they have the same async event loop, they fetch secrets the same way, ... - baseplate.readthedocs.io Growing pains: Artisanal infrastructure - they had hand-crated infrastructure, switched to Terraform (infrastructure as code) - reusable modules - really valuable. Pulling existing infrastructure to Terraform was painful. Growing pains: Staging/integration woes - their approach for staging was inappropriate for SOA, so they started using Kubernetes. Growing pains: Infra team as a bottleneck - everything was depending on the infrastructure team, so they gave developers more freedom to modify Terraform. Not all teams want to operate the full stack for their service. Service ownership, service owner is empowered to: - Dev and test their service in a prod-like env - Do most of the work to get to production - Own the health of their service - Diagnose issues Service ownership comes with some challenges: you need to train developers and still there are mistakes going to happen. Mistakes are learning opportunities. How to build infrastructure as product? Service owners - learn some Kubernetes basics, deploy and operate their own services. Reddit Infrastructure - Keep the Kubernetes cluster running, provision AWS resources, support and advise Service owners. Engineers instead of learning entire stack, had to learn only one technology - Kubernetes. If developer needs e.g. S3 - infra engineer is responsible for providing this. Batteries included - engineers do not have to worry about logging, secrets, security, ... - everything is out of the box. Extensive documentation and training for developers. Without it, you don't have a product, you have a pile of technology. > An engineer should not require deep infra experience in order to be productive. Preventing damage: resource limits, throttling, network policy, access controls, scanning for common mistakes, docker image policies ================================================ FILE: conferences/aws-innovate-ai-ml-21.md ================================================ [go back](https://github.com/pkardas/learning) # AWS Innovate: AI/ML Edition 2021 - [Move and scale your ML experiments in the cloud](#move-and-scale-your-ml-experiments-in-the-cloud) - [Detect potential bias in your datasets and explain how your models predict](#detect-potential-bias-in-your-datasets-and-explain-how-your-models-predict) - [Deploy state-of-the-art ML models and solutions in a single click](#deploy-state-of-the-art-ml-models-and-solutions-in-a-single-click) Online conference took part on 24.02.2021, I participated in a couple of talks. ## Move and scale your ML experiments in the cloud Machine learning experiments (labeling the data, storage, sharing, saving, tuning parameters) can be done in Amazon SageMaker IDE - secure, scalable, compliant solution - DevOps ready solution. **How to start?** We usually start with local notebooks, which are not powerful enough. You could move your Jupiter Notebook to the cloud (doing it on your own - a lot of maintenance), we can do better. DEMO: 1. Just go to the SageMaker page on AWS 2. Open SageMaker Studio (limitation: one instance per region) 3. We are going through Standard setup: 1. Authentication method selection (SSO or IAM) 2. Permissions: which resources it can access - e.g. storage, by default SageMaker has access to any bucket with " sagemaker" in the name 3. You can make your notebook shareable 4. Network and storage definitions - VPC or Public Internet, security groups, encryption 5. You can add your tags to identify resources 4. Setup will take a few minutes You can open the application. This is literally JupyterLab. You can copy for example GitHub repo there and run the notebooks (it has git integration, so switching between branches is easy). You can easily switch machines, largest: 488GB of RAM! ![aws-innovate-ai-ml-21-1](../_images/aws-innovate-ai-ml-21-1.png) Example training: ![aws-innovate-ai-ml-21-2](../_images/aws-innovate-ai-ml-21-2.png) SageMaker is not just a notebook - it allows for data preparation, building models, training, tuning and deployment. ## Detect potential bias in your datasets and explain how your models predict Bias - unfair representation of reality, as we use datasets, there is a risk, that data we use does not represent reality. Explainability - complex models, hard to understand why model came up with a prediction (e.g. deep learning). We need to know why model came up with certain decision, e.g. medicine, legal obligations. **How to solve these issues?** We used some dataset that have the following columns: age, sex, skin colour, ... Zooming in on sex: 1/3 female, 2/3 males - imbalanced. Zoom even more, 1:7 for sex earnings with >50k USD. Model can be biased towards overrepresented group. So the first approach is to visualise the data to detect bias. But AWS has something better. **Analysis using Amazon SageMaker Clarify** Bias analysis: pre-training analysis and post-training analysis. We define "potential" biased group: `faced_name="Sex"`. Results are displayed in a nice charts (many awesome metrics): ![aws-innovate-ai-ml-21-3](../_images/aws-innovate-ai-ml-21-3.png) It also outputs report in HTML and Jupyter Notebook. **Explainability** - it uses SHAP 🎉 https://github.com/slundberg/shap For explainability AWS outputs similar report: ![aws-innovate-ai-ml-21-4](../_images/aws-innovate-ai-ml-21-4.png) ## Deploy state-of-the-art ML models and solutions in a single click SageMaker Studio. Problem: text analysis, there are 60 models prepared for text analysis. We can select one, e.g. trained on Wikipedia. Then we can deploy the model, we can fine tune the model - we need to provide the dataset in a special format. Model has an endpoint, which can be tested in the Jupyter Notebook. ![aws-innovate-ai-ml-21-5](../_images/aws-innovate-ai-ml-21-5.png) We have a notebook, but we can not give it to the Product Managers, that is why we can integrate it with for example an UI. There are libraries for the integration with JavaScript. Example: banana slicer review from Amazon: ![aws-innovate-ai-ml-21-6](../_images/aws-innovate-ai-ml-21-6.png) New data flow - tool for preparing a new data. Then you can pass the data to the model to train. **Remember to shut down the endpoint because you pay for it $$$.** ================================================ FILE: conferences/brown-bags.md ================================================ [go back](https://github.com/pkardas/learning) - [NLP - State of the Art](#nlp---state-of-the-art) - [Kanban Training](#kanban-training) ## NLP - State of the Art *By Michał Jakóbczyk* Turing Test - are you able to distinguish if you are talking to a computer or a person? It determined the direction of development of NLP. > The Man Who Mistook His Wife For a Hat - Olivier Sacks - book recommendation. Analyse sentence: ```python from spacy import displacy displacy.render(nlp("Some sentence")) ``` "They ate the pizza with anchovies" - context matters (with fishes or using fishes?). "They ate the pizza with hands" "I shot an elephant in my pyjamas" - model will refer pyjama to the elephant. "I shot an elephant, in my pyjamas" - model will refer pyjama to the person. We know about these differences! Models have difficulties. 40-50 years ago, NLP was mostly about POS tags analysis, recently is more about machine learning. Python code -> Assembler <- Machine learning model. In the end everything is Assembly. *playground.tensorflow.org* - 1 square = 1 neuron that is basically checking one if / one line. Text to number: - document vectorisation - if document contains word - 1, 0 otherwise - one-hot encoding - you can use it for encoding word position (2D matrix) - a lot of memory - word embeddings - place word in a multidimensional space - adding vectors - drawing a multidimensional sphere containing multiple words - *projector.tensorflow.org* We can compare sentences using embeddings. ```python nlp("Gave a research talk in Boston").similarity(nlp("Had a science lecture in Seattle")) ``` Training is done using input text, then every word is removed (word by word) and machine is supposed to guess missing word. GPT-3 - the biggest transformer, almost 5M$ spent on training this model ## Kanban Training *By Marcin Lelek* https://tools.kaiten.io/featureban KANBAN - card + signal, name of the board, method for implementing improvements requested by client. Created by Toyota. 3 rules: - stat with what you do now - gain agreement to evolutionary change (don't make changes against people, agree on change) - encourage acts of leadership at all levels (independent teams) General practices: - you need to have a board to visualise progress - number of items in Work In Progress is limited - manage flow - work flow management, not people optimisation - make policies explicit - define policy how to treat a card in a column, e.g. when card moves from one column to another - implement feedback loops - improve collaboratively - evolve experimentally Different levels of Kanban boards - e.g. 1 WIP per person. ================================================ FILE: conferences/pycon-2022.md ================================================ [go back](https://github.com/pkardas/learning) - [[EN] Don’t use a lot where a little will do. A story of programming tricks you wish you invented](#en-dont-use-a-lot-where-a-little-will-do-a-story-of-programming-tricks-you-wish-you-invented) - [[EN] Effective data science teams with databooks](#en-effective-data-science-teams-with-databooks) - [[PL] Poetry - poezja pythonowych pakietów](#pl-poetry---poezja-pythonowych-pakietw) - [[EN] Interfaces in Python. The benefits and harms](#en-interfaces-in-python-the-benefits-and-harms) - [[EN] Observability in backends with Python and OpenTelemetry](#en-observability-in-backends-with-python-and-opentelemetry) - [[EN] Hitchhiker's guide to typing](#en-hitchhikers-guide-to-typing) - [[EN] Lightning talks](#en-lightning-talks) - [[PL] Dzielenie monolitu w praktyce](#pl-dzielenie-monolitu-w-praktyce) - [[EN] pytest on steroids](#en-pytest-on-steroids) - [[EN] Music information retrieval with Python](#en-music-information-retrieval-with-python) ## [EN] Don’t use a lot where a little will do. A story of programming tricks you wish you invented Regex has a debug mode - `re.DEBUG` Python has a built-in HTTP server capable of serving static files from the current directory. ## [EN] Effective data science teams with databooks `databooks` - a tool for dealing with notebooks (automatic conflict resolution, metadata stripping, pre-commit hooks, printing a notebook in a terminal, pretty printing git diff) Architecture as Code (AaC) with Python or way to become your own boss Prototyping and visualization of system architecture using code. `diagrams` - a library for creating diagrams from Python ## [PL] Poetry - poezja pythonowych pakietów Narzędzie do zarządzania zależnościami, a także do tworzenia pakietów oraz ich publikacji. Może zastąpić `pip` czy `virtualenva`. Wersjonowanie semantyczne: `major.minor.patch` ## [EN] Interfaces in Python. The benefits and harms Abstract classes in Python - ABC Sequence - any collection implementing 2 methods (length and getter) Dependency Injection - passing parameters directly to for example init method. ## [EN] Observability in backends with Python and OpenTelemetry Trace - a JSON object, can travel between services. Simple types (int, bool, lists, ...) Auto-instrumentation - install a couple of libraries, run the command, done. Manual-instrumentation - via a context manager or a decorator inside the code. Distributed tracing with queues - context of the trace is going to be part of the massage that you enqueue. Jagger - one of tools compatible with OpenTelemetry. unicorn has a separate thread for OpenTelemetry data. ## [EN] Hitchhiker's guide to typing urllib3 case study: https://sethmlarson.dev/blog/tests-arent-enough-case-study-after-adding-types-to-urllib3 ## [EN] Lightning talks GitHub Actions are capable of running cron jobs. Idea: when learning a new language, rewrite an existing command line tool in selected language. ## [PL] Dzielenie monolitu w praktyce Kryteria sukcesu wydzielania mikroserwisu: chce szybko widzieć efekty, moc wycofać się w każdym momencie, testować system z ruchem produkcyjnym, ALE nie chce zepsuć produkcji, chce moc wrócić do starego rozwiązania, jak najmniej zmieniać w monolicie. Przekształcanie monolitu w mikroserwisu: wydzielenie interfejsu w monolicie, stworzenie mikroserwisu z identycznym interfejsem, dodanie nowej implementacji w monolicie korzystającej z nowego serwisu. Gdy przychodzi zapytanie możemy je wysyłać do dwóch miejsc, ostateczna odpowiedź powinna pochodzić ze starego systemu, po okresie testów przełączamy się na nowe rozwiązanie. ![pycon-2022-monolith](../_images/pycon-2022-monolith.jpeg) Strangler Pattern - nazwa pochodzi od rośliny, która pasożytuje na drzewie, wykorzystuje je żeby rosnąć w górę po czym je zabija. Działanie w Shadow Mode - wydzielenie mikroserwisu, zebranie zapytań i wyników. ## [EN] pytest on steroids Everything in pytest is a plugin. When you create a fixture you create a local plugin. ## [EN] Music information retrieval with Python `pedalbord` by Spotify - a python library for audio effects `Pyo` - audio synthesis engine, effects control, implementing loopers, used in live music ![pycon-2022-apis](../_images/pycon-2022-apis.jpeg) `ISMIR dataset` - various datasets with music, lyrics, ... `mirdata` - a Python wrapper for ISMIR datasets `Librosa` - a library for music analysis In general, there are plenty tools for music analysis, which then can be used to train ML models. ![pycon-2022-music-tagging](../_images/pycon-2022-music-tagging.jpeg) ![pycon-2022-source-separation](../_images/pycon-2022-source-separation.jpeg) ![pycon-2022-source-separation-1](../_images/pycon-2022-source-separation-1.jpeg) ![pycon-2022-transcription](../_images/pycon-2022-transcription.jpeg) Music recommendations: very complex, massive business and cultural impact: ![pycon-2022-music-recommendations](../_images/pycon-2022-music-recommendations.jpeg) Generating music - neural audio synthesis or symbolic composition (then needs to be played by a human). Links: - https://openai.com/blog/jukebox/ - https://youtu.be/bXBliLjImio - https://youtu.be/MwtVkPKx3RA - https://youtu.be/tgq1YTQ2c0s - https://magenta.tensorflow.org ================================================ FILE: courses/fast-ai.md ================================================ [go back](https://github.com/pkardas/learning) # Practical Deep Learning for Coders Course -> https://course.fast.ai/ [TOC] ## Lesson 1 Truth, to start with Deep Learning: - high school math is sufficient - there is no need for enormous amounts of data - no need for expensive hardware for basic usage 1961 first machine built on top of mathematical model from 1943. Heavily criticised by Minsky - example that artificial neural network could not learn simple XOR. Global academic gave up on neural networks. 1986 MIT released a paper defining requirements for building and using neural networks. Later researchers proved, that adding additional layers of neural networks is enough to approximate any mathematical model. But in fact these models were too slow and too big to be useful. **What is ML?** Like regular programming, a way to get computers to complete a specific task. Instead of telling the computer the exact steps to solve a problem, show it examples of the problem to solve and let it figure out how to solve it itself. *Neural network* - parametrised function that can solve any problem to any level of accuracy (in theory - *universal approximation theorem)*. What does it mean to train neural network? It means finding good weights. This is called **SDG**. SDG - Stochastic Gradient Descent. Neural Networks work using patterns, need labeled data and create PREDICTIONS not recommended actions. You need to be super careful what is the input data (initial bias, stereotypic data) will produce biased results. E.g. marihuana consumption is equal amon whites and blacks, but black people are mor often arrested for marijuana possession. Given biased input data will produce biased predictions, e.g. send more police officers to black neighbourhoods. Segmentation - marking areas on images (trees, cars, ...) ## Lesson 2 When you want to predict a category you are facing a classification problem. Whenever you want to predict a number you are dealing with regression problem. ```python learn = cnn_learner(data, architecture, metric) ``` Architecture - e.g. *resnet32, resnet64* - name of the architecture (64 layers) - function that we are optimising. Epoch - e.g. looking at every image in the training set = 1 epoch, 1 loop Metric - function measuring quality of the model's predictions (*error_rate, accuracy*), we care about it. Loss != Metric, loss - computer uses this to update parameters, computer cares about it. For example tweaking parameters just a little might not change accuracy or error rate. Model might cheat - "I have seen this image, this is a cat", we don't want model to memorise images. That is why we need splitting into training and validation. For validating time-series, you should not removed e.g. 20% of the data, instead, drop off the end and let the model predict e.g. next 2 weeks. *Transfer learning* - using a pretrained model for a task different to what it was originally trained for. Take pretrained (initial weights), add more epochs on your specific dataset and you will end up with way more better model. *Fine tuning* - transfer learning technique where the weights of pretrained model are updated by training for additional epochs using different task to that used for pretraining. You can take advantage of pretrained feature - e.g. dog faces, patterns, etc. Computer Vision can be used for variety of problems, e.g. sound, virus analysis (data transformed into images). ![fast-ai-1](../_images/fast-ai-1.png) Set of pretrained models: https://modelzoo.co/ *How to decide if there is a relationship?* *Null hypothesis* - e.g. "no relationship between X and Y" -> gather data -> how often do we see a relationship? *P-Value* - probability of an observed result assuming that the null hypothesis is true. ## Lesson 3 Square images are easier to process, you need to remember the length of only one dimension. `Squishing` is the most efficient method for resizing, because cropping removes information, adding black bars wastes computations. Another most common method is `Random Resize Crop` - few batches, different parts of the image are taken ImageClassifierCleaner - utility tool (GUI) for finding examples, classifier is least confident about. You can manually improve labelling. `VBox` - you can group multiple widgets together and create prototype application in notebook. `viola` - plugin for hiding cells with code, only inputs and outputs are visible. Add `viola` to the URL, and it will display an application-like website in the browser. Great for prototyping. mybinder.org - you can turn notebook from GitHub into a publicly available web application. *Healthy skin* example - bing returns images of a young white woman - bias! Book recommendation: *Building Machine Learning Powered Applications* Feedback loop - e.g. predictive policing - system that sends police - feedback loops can result in negative implications of that bias getting worse and worse - e.g. you send police to the same place over and over. FastPages - dump notebook into a page. Recognising hand written digits (MNIST) was considered challenging problem ~20 years ago. Baseline idea: compare model / ideal number with input - for MNIST, calculate average of the training set, on validation set - calculate distance (~95% accuracy). Baseline should be something simple to implement - then you build something on top of it. Broadcasting - if shapes of 2 elements don't match, e.g. A (1010, 28, 28) - B (28, 28), B will be subtracted from every 1010 items from A. PyTorch has engine for calculating derivatives. In PyTorch `_` at the end of the method means "method in place". Learning rate - size of a step in gradient descent ## Plant Pathology https://www.kaggle.com/c/plant-pathology-2021-fgvc8/overview ```python import csv from fastai.vision.all import * from fastai.metrics import error_rate, accuracy path = Path("/kaggle/input/plant-pathology-2021-fgvc8") # Prepare data, labels are stored separately: with open(path / "train.csv", mode='r') as csv_file: csv_reader = csv.DictReader(csv_file) train_labels = { row["image"]: row["labels"] for row in csv_reader } # Function used for labeling images: def label_func(file_path: Path) -> str: return train_labels[str(file_path).split('/')[-1]] # Read data: data_block = DataBlock( blocks=(ImageBlock, CategoryBlock), get_items=get_image_files, get_y=label_func, item_tfms=Resize(224) ) # DataBlock to DataLoader: data_loaders = data_block.dataloaders(path / "train_images") # Available classes: data_loaders.vocab # Few example images: data_loaders.show_batch() # ResNet34 architecture for image classification: learner = cnn_learner(data_loaders, models.resnet34, metrics=error_rate) # 4 epochs, unfortunately one epoch takes ~1h most probably because of incorrect use of 'item_tfms' in DataBlock, which disables GPU usage: learner.fine_tune(4) # Model validation, this model achieved 0.62 error_rate. interpretation = ClassificationInterpretation.from_learner(learner) interpretation.plot_confusion_matrix() interpretation.plot_top_losses(5, nrows=1, figsize=(25, 5)) # Saving model: learner.export() ``` ================================================ FILE: patterns/abbreviations.md ================================================ [go back](https://github.com/pkardas/learning) # Abbreviations - [SOLID](#solid) - [DRY - Don't Repeat Yourself](#dry---dont-repeat-yourself) - [KISS - Keep It Simple, Stupid](#kiss---keep-it-simple-stupid) - [ACID](#acid) - [BASE](#base) - [CAP](#cap) - [NF](#nf) ## SOLID ### SRP - Single Responsibility Principle A class should have only one reason to change, so in order to reduce reasons for modifications - one class should have one responsibility. It is a bad practise to create classes doing everything. Why is it so important that class has only one reason to change? If class have more than one responsibility they become coupled and this might lead to surprising consequences like one change breaks another functionality. You can avoid these problems by asking a simple question before you make any changes: What is the responsibility of your class / component / micro-service? If your answer includes the word “and”, you’re most likely breaking the single responsibility principle. ### OCP - Open-Closed Principle Classes, modules, functions, etc. should be open to extension but closed to modification. Code should be extensible and adaptable to new requirements. In other words, we should be able to add new system functionality without having to modify the existing code. We should add functionality only by writing new code. If we want to add a new thing to the application and we have to modify the "old", existing code to achieve this, it is quite likely that it was not written in the best way. Ideally, new behaviors are simply added. ### LSP - Liskov Substitution Principle This rule deals with the correct use of inheritance and states that wherever we pass an object of a base class, we should be able to pass an object of a class inheriting from that class. Example of violation: ```python class A: def foo() -> str: return "foo" class B(A): def foo(bar: str) -> str: return f"foo {bar}" ``` B is not taking the same arguments, meaning A and B are not compatible. A can not be used instead of B, and B can not be used instead of A. ### ISP - Interface Segregation Principle Clients should not be forced to depend upon interfaces that they do not use. ISP splits interfaces that are very large into smaller and more specific ones so that clients will only have to know about the methods that are of interest to them. Example of violation: ```python class Shape: def area() -> float: raise NotImplementedError def volume() -> float(): raise NotImplementedError ``` 2D triangle does not have volume, hence it would need to implement interface that is not needed. In order to solve this, there should be multiple interfaces: Shape and 3DShape. ### DIP - Dependency Inversion Principle High-level modules, which provide complex logic, should be easily reusable and unaffected by changes in low-level modules, which provide utility features. To achieve that, you need to introduce an abstraction that decouples the high-level and low-level modules from each other. > Entities must depend on abstractions, not on concretions. It states that the high-level module must not depend on the > low-level module, but they should depend on abstractions. For example password reminder should not have knowledge about database provider (low level information). ## DRY - Don't Repeat Yourself "Every piece of knowledge must have a single, unambiguous, authoritative representation within a system". When the DRY principle is applied successfully, a modification of any single element of a system does not require a change in other logically unrelated elements. ## KISS - Keep It Simple, Stupid The KISS principle states that most systems work best if they are kept simple rather than made complicated; therefore, simplicity should be a key goal in design, and unnecessary complexity should be avoided. ## ACID ### Atomicity Each transaction is either properly carried out or the process halts and the database reverts back to the state before the transaction started. This ensures that all data in the database is valid. ### Consistency A processed transaction will never endanger the structural integrity of the database. Database is always in consistent state. ### Isolation Transactions cannot compromise the integrity of other transactions by interacting with them while they are still in progress. ### Durability The data related to the completed transaction will persist even in the cases of network or power outages. If a transaction fails, it will not impact the manipulated data. ## BASE ### Basically Available Ensure availability of data by spreading and replicating it across the nodes of the database cluster - this is not done immediately. ### Soft State Due to the lack of immediate consistency, data values may change over time. The state of the system could change over time, so even during times without input there may be changes going on due to 'eventual consistency', thus the state of the system is always 'soft'. ### Eventually Consistent The system will *eventually* become consistent once it stops receiving input. The data will propagate to everywhere it should sooner or later, but the system will continue to receive input and is not checking the consistency of every transaction before it moves onto the next one. ## CAP In theoretical computer science, the CAP theorem states that it is impossible for a distributed data store to simultaneously provide more than two out of the following three guarantees: ### Consistency Every read receives the most recent write or an error. Refers to whether a system operates fully or not. Does the system reliably follow the established rules within its programming according to those defined rules? Do all nodes within a cluster see all the data they are supposed to? This is the same idea presented in ACID. ### Availability Every request receives a (non-error) response, without the guarantee that it contains the most recent write. Is the given service or system available when requested? Does each request get a response outside of failure or success? ### Partition Tolerance Represents the fact that a given system continues to operate even under circumstances of data loss or system failure. A single node failure should not cause the entire system to collapse. ## NF Database normalisation is the process of structuring a database, usually a relational database, in accordance with a series of so-called normal forms in order to reduce data redundancy and improve data integrity. ### 1NF To satisfy 1NF, the values in each column of a table must be atomic. ### 2NF Must be in 1NF + single column primary key (no composite keys). ### 3NF Must be in 2NF + no transitive functional dependencies. Transitive Functional Dependencies - when changing a non-key column, might cause any of the other non-key columns to change. For example: ![3nf-violation](../_images/3nf-violation.png) ================================================ FILE: patterns/architecture.md ================================================ [go back](https://github.com/pkardas/learning) # Architecture Patterns - [Command and Query Responsibility Segregation (CQRS)](#command-and-query-responsibility-segregation-cqrs) - [Reporting Database](#reporting-database) - [Event Sourcing](#event-sourcing) - [Saga](#saga) ## Command and Query Responsibility Segregation (CQRS) Based on: https://docs.microsoft.com/en-us/azure/architecture/patterns/cqrs, https://martinfowler.com/bliki/CQRS.html , https://bulldogjob.pl/articles/122-cqrs-i-event-sourcing-czyli-latwa-droga-do-skalowalnosci-naszych-systemow_ This pattern separates read and update operations for a data store. Traditionally the same data model is used to query and update a database. This might work well but for simple CRUD applications. For more complex applications, where there are more advanced operations on read and write sides CQRS might be a better idea. Commands update data, queries read data. Commands should be *task based*, rather than *data centric* (book hotel room instead of set `reservation_status` to `reserved`). Queries *never* modify the database. Usually whenever command updates data it is also publishing an event and this needs to be done within a single transaction. ![patterns-architecture-cqrs-martin-fowler](../_images/patterns-architecture-cqrs-martin-fowler.png) CQRS: - you are able to scale Command and Query independently - separate models for updating and querying might lead to eventual consistency - suited for complex domains ## Reporting Database Based on: https://martinfowler.com/bliki/ReportingDatabase.html Set up second database for reporting purposes, this database is completely different from the operational (application) database. Reporting Database: - designed specifically for reports - can be denormalized, usually read-only - redundant information might speed up queries - queries on the database don't add to the load on the operational database - additional data might be derived from the operational database - needs to be synced somehow with the main database (eg. sync data overnight or sync using events) ## Event Sourcing Based on: https://docs.microsoft.com/en-us/azure/architecture/patterns/event-sourcing , https://microservices.io/patterns/data/event-sourcing.html > How to reliably/atomically update the database and publish messages/events? Instead of maintaining current state, application can have a log of state changes. Whenever the state of a business entity changes, a new event is appended to the list of events. Since saving an event is a single operation, it is inherently atomic. The application reconstructs an entity’s current state by replaying the events. The event log also behaves like message broker. When a service saves an event in the event store, it is delivered to all interested subscribers. > Event sourcing is commonly combined with the CQRS pattern by performing the data management tasks in response to the > events, and by materialising views from the stored events. In order to maintain consistency in multi-threaded applications, adding a timestamp to every event might help in resolving issues, but not in all cases. Better approach is to label each event with an incremental identifier. If two actions attempt to add events for the same entity at the same time, the event store can reject an event that matches an existing entity identifier. ![patterns-architecture-event-sourcing-overview-microsoft](../_images/patterns-architecture-event-sourcing-overview.png) This pattern is useful when: - you want to capture intent, purpose, or reason in the data - you want to record events that occur, and be able to replay them to restore the state of a system, roll back changes, or keep a history and audit log Not useful when: - small problems - consistency and real-time updates to the views of the data are required - history, and capabilities to roll back and replay actions are not required Example: banking system - list of all transactions, basing on these transactions your total balance is calculated. ## Saga Based on: https://microservices.io/patterns/data/saga.html In a design where each service has its own database, sometimes transactions have to span multiple services, hence local ACID transaction is not an option. A solution to this problem is *Saga* - a sequence of local transactions. Each local transaction updates the database and publishes a message or event to trigger the next local transaction in the saga. If a local transaction fails because it violates a business rule then the saga executes a series of compensating transactions that undo the changes that were made by the preceding local transactions. For example: Service A creates a new Order with PENDING state and publishes an event that is consumed by another service B, service B responds with an event to service A. Service A accepts or rejects new Order. DON'T: Based on `Chapter 17: Microservices Architecture` @ `Fundamentals of Software Architecture`: > Don't do transactions in microservices - fix granularity instead. ================================================ FILE: teaching/python-intermediate/README.md ================================================ [go back](https://github.com/pkardas/learning) # Python Intermediate Repository with the code and tasks: https://github.com/pkardas/shapes ================================================ FILE: teaching/python-intro/README.md ================================================ [go back](https://github.com/pkardas/learning) # Introduction to Programming: Python for beginners This folder contains the presentation and the notebook used during "Introduction to Programming: Python for beginners" classes. Training was intended for people with no prior programming skills. Each training was scheduled for 2 hours. `presentation` - meeting agenda, topics, theory, examples `notebook` - Jupyter Notebook with assignments, audience was supposed to fill in the gaps using provided theory and examples. ================================================ FILE: teaching/python-intro/notebook.ipynb ================================================ { "nbformat": 4, "nbformat_minor": 0, "metadata": { "colab": { "name": "Introduction to programming: Python for beginners.ipynb", "provenance": [], "collapsed_sections": [ "rDyFlkw1DnX_", "lLgIUzF_PwR7", "jxT57KJoPSs3", "2eRI479WVjka", "4i61CcItIwUv", "jvxBKZp8nRZP", "6wz3rtllMw6k", "JOGgKUXDx356" ] }, "kernelspec": { "name": "python3", "display_name": "Python 3" } }, "cells": [ { "cell_type": "markdown", "metadata": { "id": "rDyFlkw1DnX_" }, "source": [ "# Task 1 \n" ] }, { "cell_type": "code", "metadata": { "id": "6oNEjs4RbFo8" }, "source": [ "def airhelp() -> str:\n", " return \"AirHelp\"\n", "\n", "airhelp()" ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "-2d4bfTpaaUB" }, "source": [ "from datetime import date, timedelta\n", "\n", "def yesterday() -> date:\n", " return date.today() - timedelta(days=1)\n", "\n", "yesterday()" ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "glbGi1MgD0RH" }, "source": [ "# Write a function with a name `hello_world` that **returns**: \"Hello world!\". Fill the gaps with Python code. \n", "\n", "def AAA() -> str:\n", " return BBB\n", "\n", "hello_world()" ], "execution_count": null, "outputs": [] }, { "cell_type": "markdown", "metadata": { "id": "lLgIUzF_PwR7" }, "source": [ "# Task 2: \n" ] }, { "cell_type": "code", "metadata": { "id": "mCBTZQCZbaMD" }, "source": [ "def y(x: int) -> int:\n", " return 2 * x\n", "\n", "y(10)" ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "e1GeGmhqbjIw" }, "source": [ "from typing import List\n", "\n", "def odds(numbers: List[int]) -> List[int]:\n", " return [number for number in numbers if number % 2 != 0]\n", "\n", "odds([1, 2, 3, 4, 5, 6, 7, 8, 9])" ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "g8oV4dh_PwR8" }, "source": [ "# Write a function that greets the user, user's name is provided via a parameter. Return string with injected user name.\n", "\n", "def CCC(DDD: str) -> str:\n", " return f\"Hello, {DDD} 👋\"\n", "\n", "print(hello(\"Kamil\")) # Hello, Kamil! 👋\n", "print(hello(\"Piotr\")) # Hello, Piotr! 👋\n", "print(hello(\"Marta\")) # Hello, Marta! 👋" ], "execution_count": null, "outputs": [] }, { "cell_type": "markdown", "metadata": { "id": "jxT57KJoPSs3" }, "source": [ "# Task 3: \n" ] }, { "cell_type": "code", "metadata": { "id": "lLU8BZZSdRZa" }, "source": [ "class Person:\n", " def __init__(self, name: str, surname: str, age: int) -> None:\n", " self.name = name\n", " self.surname = surname\n", " self.age = age\n", "\n", "p0 = Person(\"Anja\", \"Rubik\", 37)\n", "p1 = Person(\"Elon\", \"Musk\", 49)\n", "\n", "def introduce_person(person: Person) -> str:\n", " return f\"{person.name} is {person.age} years old.\"\n", "\n", "print(introduce_person(p0))\n", "print(introduce_person(p1))" ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "9kZa8oAbPSs4" }, "source": [ "# Build a new data type - Message. Message should have 3 attributes: `content`, `sender_email` and `received_at`. Fill the gaps.\n", "\n", "from datetime import datetime\n", "\n", "class Message:\n", " def EEE(self, FFF: str, GGG: str, HHH: datetime) -> None:\n", " self.FFF = FFF\n", " self.GGG = GGG\n", " self.HHH = HHH\n", "\n", "m0 = Message(\"Hello! How are you?\", \"adam@gmail.com\", datetime(2021, 4, 21, 12, 0, 0))\n", "m1 = Message(\"I am fine!\", \"dan@gmail.com\", datetime.utcnow())\n", "\n", "print(m0.content, m0.sender_email, m0.received_at)\n", "print(m1.content, m1.sender_email, m1.received_at)" ], "execution_count": null, "outputs": [] }, { "cell_type": "markdown", "metadata": { "id": "2eRI479WVjka" }, "source": [ "# Task 4: \n" ] }, { "cell_type": "code", "metadata": { "id": "1u5ekl3OfHoJ" }, "source": [ "class Person:\n", " def __init__(self, name: str, surname: str, age: int) -> None:\n", " self.name = name\n", " self.surname = surname\n", " self.age = age\n", "\n", " def introduce(self) -> str:\n", " return f\"{self.name} is {self.age} years old.\"\n", "\n", "p0 = Person(\"Anja\", \"Rubik\", 37)\n", "p1 = Person(\"Elon\", \"Musk\", 49)\n", "\n", "print(p0.introduce())\n", "print(p1.introduce())" ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "8RzdyrbNe2yx" }, "source": [ "# Extend `Message`. Add a method that will return message language." ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "gzrXvaK_WmxA" }, "source": [ "! pip install langdetect" ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "DpnviQcAVjkc" }, "source": [ "from langdetect import detect\n", "\n", "class Message:\n", " def __init__(self, content: str, sender_email: str, received_at: datetime) -> None:\n", " self.content = content\n", " self.sender_email = sender_email\n", " self.received_at = received_at\n", " \n", " @property\n", " def language(self) -> str:\n", " return detect(self.JJJ).upper()\n", "\n", "m0 = Message(\"Hi Johny.\", \"adam@gmail.com\", datetime(2021, 4, 21, 12, 0, 0))\n", "m1 = Message(\"こんにちは、Akikoさん。\", \"dan@gmail.com\", datetime(2021, 4, 21, 13, 0, 0))\n", "\n", "print(f\"'{m0.content}' is in {m0.language}\") # This should print: \"'Hi Johny.' is in EN\"\n", "print(f\"'{m1.content}' is in {m1.language}\") # This should print: \"'こんにちは、Akikoさん。' is in JA\"" ], "execution_count": null, "outputs": [] }, { "cell_type": "markdown", "metadata": { "id": "4i61CcItIwUv" }, "source": [ "# Task 5: " ] }, { "cell_type": "code", "metadata": { "id": "UmHhUjSafdSV" }, "source": [ "def print_people(people: List[Person]) -> None:\n", " for i, person in enumerate(people):\n", " print(i, person.name, person.surname)\n", "\n", "p0 = Person(\"Anja\", \"Rubik\", 37)\n", "p1 = Person(\"Elon\", \"Musk\", 49)\n", "p2 = Person(\"Abel\", \"Tesfaye\", 31)\n", "p3 = Person(\"Guido\", \"van Rossum\", 65)\n", "\n", "\n", "people = [p0, p1, p2, p3]\n", "\n", "print_people(people)" ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "AW4cx5OugafV" }, "source": [ "people[2].surname" ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "JYGq-YtQIwUx" }, "source": [ "from typing import List\n", "\n", "m0 = Message(\"Today is a beautiful day\", \"tom@gmail.com\", datetime(2020, 1, 1))\n", "m1 = Message(\"Today is rather average day\", \"adam@gmail.com\", datetime(2005, 12, 5))\n", "m2 = Message(\"Dziś jest piękny dzień\", \"ewa@gmail.com\", datetime(2021, 4, 21))\n", "m3 = Message(\"Aujourd'hui est une belle journée\", \"tina@gmail.com\", datetime(2020, 12, 5))\n", "\n", "def print_messages(messages: List[Message]) -> None:\n", " for i, message in enumerate(messages):\n", " print(i, message.content)" ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "zfSSyrxNJ5eB" }, "source": [ "# Group messages `m0, m1, m2, m3` together\n", "messages = KKK\n", "\n", "print_messages(messages)" ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "k2zBcriFlaXi" }, "source": [ "# Access first message from the list\n", "messages[LLL].content" ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "DfB4WNlAlr0O" }, "source": [ "# Access the last message from the list\n", "messages[MMM].content" ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "k2X690Xql1B3" }, "source": [ "# Assign the last message to the variable and display message language\n", "last_message = messages[NNN]\n", "last_message.language" ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "D5K5KejanLa6" }, "source": [ "# Display the language of the last message without assigning to the variable\n", "messages[NNN].language" ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "Id6lwNVtmHYT" }, "source": [ "# Append message m4 to the existing list of the messages\n", "m4 = Message(\"Can you append me to the list, please?\", \"karen@gmail.com\", datetime(2021, 1, 5))\n", "messages.OOO(m4)\n", "\n", "print_messages(messages)" ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "Ebap0NT4ngOt" }, "source": [ "# ITERATE over the list of messages and print: message content, sender and message language.\n", "for PPP in QQQ:\n", " print(PPP.content, PPP.sender_email, PPP.language)" ], "execution_count": null, "outputs": [] }, { "cell_type": "markdown", "metadata": { "id": "jvxBKZp8nRZP" }, "source": [ "# Task 6: \n" ] }, { "cell_type": "code", "metadata": { "id": "Xedg0en_aPbd" }, "source": [ "people_over_40 = [person for person in people if person.age > 40]\n", "\n", "print_people(people_over_40)" ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "cl7oWpTqrtbA" }, "source": [ "# Write a function returning filtered messages. Filter by message language.\n", "\n", "def messages_in_language(messages: List[Message], country_code: str) -> List[Message]:\n", " return [RRR for RRR in SSS if RRR.language == country_code]\n", "\n", "messages = [\n", " Message(\"This message is in English\", \"xyz@gmail.com\", datetime.now()),\n", " Message(\"This message is also in English\", \"xyz@gmail.com\", datetime.now()),\n", " Message(\"Ta wiadomość jest po polsku\", \"xyz@gmail.com\", datetime.now()),\n", " Message(\"Ta wiadomość również jest po polsku\", \"xyz@gmail.com\", datetime.now()),\n", " Message(\"このメッセージは日本語で書かれています。\", \"xyz@gmail.com\", datetime.now()),\n", " Message(\"このメッセージは日本語でも書かれています\", \"xyz@gmail.com\", datetime.now()),\n", "]\n", "\n", "print(\"-- PL --\")\n", "print_messages(messages_in_language(messages, \"PL\"))\n", "print(\"-- EN --\")\n", "print_messages(messages_in_language(messages, \"EN\"))\n", "print(\"-- JA --\")\n", "print_messages(messages_in_language(messages, \"JA\"))" ], "execution_count": null, "outputs": [] }, { "cell_type": "markdown", "metadata": { "id": "6wz3rtllMw6k" }, "source": [ "# Task 7: \n" ] }, { "cell_type": "code", "metadata": { "id": "gxNBaSLpvFVY" }, "source": [ "[1, 1, 1, 1, 1, 2]" ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "Oii5vHEFvLPi" }, "source": [ "(1, 1, 1, 1, 1, 2)" ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "vXB9LKlmvQAE" }, "source": [ "{1, 1, 1, 1, 1, 2}" ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "H4Gss1dPMw6m" }, "source": [ "# Write a function returning unique e-mails from the provided list of messages.\n", "\n", "def unique_emails(messages: List[Message]) -> List[str]:\n", " return list({message.TTT for message in messages})\n", "\n", "messages = [\n", " Message(\"Lorem ipsum\", \"anna@gmail.com\", datetime.now()),\n", " Message(\"Lorem ipsum\", \"dan@gmail.com\", datetime.now()),\n", " Message(\"Lorem ipsum\", \"tom@gmail.com\", datetime.now()),\n", " Message(\"Lorem ipsum\", \"kate@gmail.com\", datetime.now()),\n", " Message(\"Lorem ipsum\", \"tom@gmail.com\", datetime.now()),\n", " Message(\"Lorem ipsum\", \"kate@gmail.com\", datetime.now()),\n", " Message(\"Lorem ipsum\", \"anna@gmail.com\", datetime.now()),\n", " Message(\"Lorem ipsum\", \"kate@gmail.com\", datetime.now()),\n", "]\n", "\n", "# This should print, somthing like:\n", "# ['tom@gmail.com', 'anna@gmail.com', 'dan@gmail.com', 'kate@gmail.com']\n", "# (order might be different)\n", "unique_emails(messages)" ], "execution_count": null, "outputs": [] }, { "cell_type": "markdown", "metadata": { "id": "JOGgKUXDx356" }, "source": [ "# Task 8: \n" ] }, { "cell_type": "code", "metadata": { "id": "RbDPROw9avi9" }, "source": [ "sorted_people = sorted(people, key=lambda person: person.age)\n", "\n", "print_people(sorted_people)" ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "NO-du1hNx35-" }, "source": [ "# Write a function returning messages sorted by date.\n", "\n", "def messages_sorted_by_date(messages: List[Message]) -> List[Message]:\n", " return sorted(VVV, key=lambda message: message.UUU)\n", "\n", "messages = [\n", " Message(\"1\", \"example@gmail.com\", datetime(2005, 1, 1)),\n", " Message(\"3\", \"example@gmail.com\", datetime(2006, 6, 2)),\n", " Message(\"6\", \"example@gmail.com\", datetime(2020, 6, 6)),\n", " Message(\"4\", \"example@gmail.com\", datetime(2007, 4, 1)),\n", " Message(\"8\", \"example@gmail.com\", datetime(2021, 5, 5)),\n", " Message(\"2\", \"example@gmail.com\", datetime(2005, 2, 6)),\n", " Message(\"7\", \"example@gmail.com\", datetime(2020, 9, 9)),\n", " Message(\"5\", \"example@gmail.com\", datetime(2010, 9, 1)),\n", "]\n", "\n", "# This should print something like: \"1, 2, 3, 4, 5, 6, 7, 8\"\n", "print_messages(messages_sorted_by_date(messages))" ], "execution_count": null, "outputs": [] } ] }