Repository: jennybc/code-smells-and-feels Branch: master Commit: 25e1a11272d7 Files: 22 Total size: 25.7 KB Directory structure: gitextract_l_5jxohu/ ├── .gitignore ├── R/ │ ├── 00_biz_comment-decomment.R │ ├── 01_biz_if-else.R │ ├── 02_biz_function.R │ ├── 03_biz_complicated-conditions.R │ ├── 04_biz_simplify-conditions.R │ ├── 05_googledrive_is-parental.R │ ├── 06_biz_stopifnot.R │ ├── 07_get-some-data_before.R │ ├── 08_get-some-data_after.R │ ├── 09_googledrive_process-response.R │ ├── 10_biz_if-else-apalooza.R │ ├── 11_biz_S3.R │ ├── 12_biz_switch.R │ ├── 13_stringr_switch.R │ ├── 14_age_case-when.R │ └── 15_devtools_%||%.R ├── README.md ├── code-smells-and-feels.Rproj ├── resources/ │ └── code-smells.txt └── stackoverflow-survey/ ├── README.Rmd └── README.md ================================================ FILE CONTENTS ================================================ ================================================ FILE: .gitignore ================================================ .Rhistory .RData .Rproj.user developer_survey_2018 Refactoring_improving_the_design_of_existing_code.pdf keynote* ================================================ FILE: R/00_biz_comment-decomment.R ================================================ x <- 1:5 #x <- c(TRUE, FALSE, FALSE, TRUE, FALSE) cat( "The bizarro version of x is", -x, #!x, "\n" ) ================================================ FILE: R/01_biz_if-else.R ================================================ x <- 1:5 #x <- c(TRUE, FALSE, FALSE, TRUE, FALSE) cat( "The bizarro version of x is", if (is.numeric(x)) { -x } else { !x }, "\n" ) ================================================ FILE: R/02_biz_function.R ================================================ bizarro <- function(x) { if (is.numeric(x)) { -x } else { !x } } bizarro(1:5) bizarro(c(TRUE, FALSE, FALSE, TRUE, FALSE)) ================================================ FILE: R/03_biz_complicated-conditions.R ================================================ bizarro <- function(x) { if (class(x)[[1]] == "numeric" || class(x)[[1]] == "integer") { -x } else if (class(x)[[1]] == "logical") { !x } else { stop( "Don't know how to make bizzaro <", class(x)[[1]], ">", call. = FALSE) } } bizarro(c(TRUE, FALSE, FALSE, TRUE, FALSE)) bizarro(1:5) bizarro(c("abc", "def")) ================================================ FILE: R/04_biz_simplify-conditions.R ================================================ bizarro <- function(x) { if (is.numeric(x)) { -x } else if (is.logical(x)) { !x } else { stop( "Don't know how to make bizzaro <", class(x)[[1]], ">", call. = FALSE) } } bizarro(c(TRUE, FALSE, FALSE, TRUE, FALSE)) bizarro(1:5) bizarro(c("abc", "def")) ================================================ FILE: R/05_googledrive_is-parental.R ================================================ ## https://github.com/tidyverse/googledrive/blob/b6e97999d0781c31c69ef6fa93d1091a04b44d20/R/drive_cp.R#L67 drive_cp <- function(file, ...) { file <- as_dribble(file) file <- confirm_single_file(file) if (is_parental(file)) { stop_glue("The Drive API does not copy folders or Team Drives.") } ... } ## https://github.com/tidyverse/googledrive/blob/b6e97999d0781c31c69ef6fa93d1091a04b44d20/R/dribble.R#L253 is_parental <- function(d) { stopifnot(inherits(d, "dribble")) kind <- purrr::map_chr(d$drive_resource, "kind") mime_type <- purrr::map_chr(d$drive_resource, "mimeType", .default = NA) kind == "drive#teamDrive" | mime_type == "application/vnd.google-apps.folder" } ================================================ FILE: R/06_biz_stopifnot.R ================================================ bizarro <- function(x) { stopifnot(is.numeric(x) || is.logical(x)) if (is.numeric(x)) { -x } else { !x } } bizarro(c(TRUE, FALSE, FALSE, TRUE, FALSE)) bizarro(1:5) bizarro(c("abc", "def")) ================================================ FILE: R/07_get-some-data_before.R ================================================ get_some_data <- function(config, outfile) { if (config_ok(config)) { if (can_write(outfile)) { if (can_open_network_connection(config)) { data <- parse_something_from_network() if(makes_sense(data)) { data <- beautify(data) write_it(data, outfile) return(TRUE) } else { return(FALSE) } } else { stop("Can't access network") } } else { ## uhm. What was this else for again? } } else { ## maybe, some bad news about ... the config? } } ================================================ FILE: R/08_get-some-data_after.R ================================================ get_some_data <- function(config, outfile) { if (config_bad(config)) { stop("Bad config") } if (!can_write(outfile)) { stop("Can't write outfile") } if (!can_open_network_connection(config)) { stop("Can't access network") } data <- parse_something_from_network() if(!makes_sense(data)) { return(FALSE) } data <- beautify(data) write_it(data, outfile) TRUE } ================================================ FILE: R/09_googledrive_process-response.R ================================================ ## https://github.com/tidyverse/googledrive/blob/94c8c01cbb9fd96fe59920cf58e39263a3135337/R/process_response.R#L8 process_response <- function(res) { if (httr::status_code(res) == 204) { return(TRUE) } if (httr::status_code(res) >= 200 && httr::status_code(res) < 300) { return(res %>% stop_for_content_type() %>% httr::content(as = "parsed", type = "application/json")) } ## 20+ more lines of error handling ... } ================================================ FILE: R/10_biz_if-else-apalooza.R ================================================ str_reverse <- function(x) { vapply( strsplit(x, ""), FUN = function(z) paste(rev(z), collapse = ""), FUN.VALUE = "") } str_reverse(c("abc", "def")) bizarro <- function(x) { if (is.numeric(x)) { -x } else if (is.logical(x)) { !x } else if (is.character(x)) { str_reverse(x) } else if (is.factor(x)) { levels(x) <- rev(levels(x)) x } else { stop( "Don't know how to make bizzaro <", class(x)[[1]], ">", call. = FALSE) } } bizarro(1:5) bizarro(c(TRUE, FALSE, FALSE, TRUE, FALSE)) bizarro(c("abc", "def")) (m <- factor(month.abb, levels = month.abb)) bizarro(factor(m)) bizarro(iris) ================================================ FILE: R/11_biz_S3.R ================================================ ## snippets on slides created with lots of selective reprex()ing and toggling of ## the chunk options #+ include = FALSE str_reverse <- function(x) { vapply( strsplit(x, ""), FUN = function(z) paste(rev(z), collapse = ""), FUN.VALUE = "") } #+ include = FALSE bizarro <- function(x) { UseMethod("bizarro") } bizarro.default <- function(x) { stop( "Don't know how to make bizzaro <", class(x)[[1]], ">", call. = FALSE ) } #+ include = FALSE, eval = FALSE bizarro(1:5) bizarro(TRUE) bizarro("abc") #+ include = FALSE bizarro.numeric <- function(x) -x bizarro.logical <- function(x) !x bizarro.character <- function(x) str_reverse(x) bizarro.factor <- function(x) { levels(x) <- rev(levels(x)) x } bizarro.data.frame <- function(x) { names(x) <- bizarro(names(x)) x[] <- lapply(x, bizarro) x } #+ include = TRUE bizarro(1:5) bizarro(c(TRUE, FALSE, FALSE, TRUE, FALSE)) bizarro(c("abc", "def")) (m <- factor(month.abb[1:3], levels = month.abb[1:3])) bizarro(m) bizarro(head(iris, 3)) ================================================ FILE: R/12_biz_switch.R ================================================ #+ include = FALSE str_reverse <- function(x) { vapply( strsplit(x, ""), FUN = function(z) paste(rev(z), collapse = ""), FUN.VALUE = "") } #+ include = TRUE bizarro <- function(x) { cls <- class(x)[[1]] ## not a great idea, in general switch( cls, logical = !x, integer = , numeric = -x, character = str_reverse(x), stop("Don't know how to make bizzaro <", cls, ">", call. = FALSE) ) } bizarro(1:5) bizarro(c(TRUE, FALSE, FALSE, TRUE, FALSE)) bizarro(c("abc", "def")) bizarro(iris) ================================================ FILE: R/13_stringr_switch.R ================================================ ## https://github.com/tidyverse/stringr/blob/e775974d2822ae0de90ab33cf9f02273c22a3801/R/pad.r#L30 str_pad <- function(string, width, side = c("left", "right", "both"), pad = " ") { side <- match.arg(side) switch( side, left = stri_pad_left(string, width, pad = pad), right = stri_pad_right(string, width, pad = pad), both = stri_pad_both(string, width, pad = pad) ) } ================================================ FILE: R/14_age_case-when.R ================================================ library(tidyverse) tibble( age_yrs = c(0, 4, 10, 15, 24, 55), age_cat = case_when( age_yrs < 2 ~ "baby", age_yrs < 13 ~ "kid", age_yrs < 20 ~ "teen", TRUE ~ "adult" ) ) age_yrs <- c(0, 8, 15, 24, 55) cat( ifelse(age_yrs < 2, "baby", ifelse(age_yrs < 13, "kid", ifelse(age_yrs < 20, "teen", "adult" ) ) ) ,sep="\n") ================================================ FILE: R/15_devtools_%||%.R ================================================ ## https://github.com/r-lib/devtools/blob/b01edfbfa1fd0e3965a24188805c5e55f0d7376f/R/build-manual.R #' Create package pdf manual #' #' @param pkg package path or name #' @param path path in which to produce package manual build_manual <- function(pkg = ".", path = NULL) { pkg <- as.package(pkg) path <- path %||% dirname(pkg$path) ... } ## https://github.com/r-lib/devtools/blob/bd3bdf15b8f2e5e07d750de4360df28090a9f117/R/install-github.r#L73-L74 github_remote <- function(repo, username = NULL, ...) { meta <- parse_git_repo(repo) ... meta$username <- username %||% getOption("github.user") %||% stop("Unknown username.") ... } if (is.null(path)) { path <- dirname(pkg$path) } `%||%` <- function(x, y) { if (is_null(x)) y else x } f <- function(x, y = TRUE, z = NULL) { ... if (is.null(z)) { z <- much_logic(other, stuff, ...) } ... } f <- function(x, y = TRUE, z = NULL) { ... z <- z %||% much_logic(other, stuff, ...) ... } ================================================ FILE: README.md ================================================ # Code Smells and Feels Talk initially prepared for [useR!2018](https://user2018.r-project.org) Brisbane. Also delivered elsewhere, such as the [First Mexican Statistical Association School in Data Science](https://amestad.mx/escuela/1/) by Jenny Bryan [jennybryan.org](https://jennybryan.org) Twitter: [@jennyBryan](https://twitter.com/jennyBryan/) GitHub: [@jennybc](https://github.com/jennybc) > "Code smell" is an evocative term for that vague feeling of unease we get when reading certain bits of code. It's not necessarily wrong, but neither is it obviously correct. We may be reluctant to work on such code, because past experience suggests it's going to be fiddly and bug-prone. In contrast, there's another type of code that just feels good to read and work on. What's the difference? If we can be more precise about code smells and feels, we can be intentional about writing code that is easier and more pleasant to work on. I've been fortunate to spend the last couple years embedded in a group of developers working on the tidyverse and r-lib packages. Based on this experience, I'll talk about specific code smells and deodorizing strategies for R. ## Link to this repo [rstd.io/code-smells](https://rstd.io/code-smells) is a shortlink to HERE ## Slides Slides [on SpeakerDeck](https://speakerdeck.com/jennybc/code-smells-and-feels) Slides [as PDF file](2018-07_user-brisbane-bryan.pdf) here in this repo ## Video Video is available on YouTube: ## Credits and resources Annotated and hyperlink-y list of resources mentioned in the slides, in roughly the same order. --- Do useRs have less formal training in CS/programming than others writing code? 2018 Stack Overflow Annual Developer Survey: Adapted from original code by [Julia Silge](https://juliasilge.com), data scientist at Stack Overflow. Code here in this repo: [stackoverflow-survey](stackoverflow-survey) --- Talks about programming style, workflow, and policies Some that inspired me: Good Programming Practice, UseR! 2004 Keynote, Martin Mächler http://www.ci.tuwien.ac.at/Conferences/useR-2004/ What I find important when R Programming and Recent Cool Features in R 2018 eRum Keynote by Martin Mächler and R Core Team http://stat.ethz.ch/~maechler/U/R/eRum_2018_ProgR-ALTREP.html My own efforts in this genre: [Zen And The aRt Of Workflow Maintenance](https://speakerdeck.com/jennybc/zen-and-the-art-of-workflow-maintenance), IASC/NZSA 2017, Jenny Bryan [Workflow: You should have one](https://speakerdeck.com/jennybc/workflow-you-should-have-one), EARL London 2017, Jenny Bryan --- Cakes that look like hedgehogs ... sort of? * Beautiful hedgehog cake: [BBC goodfood recipe](https://www.bbcgoodfood.com/recipes/hedgehog-cake) * Homely hedgehog cake: [Reddit thread](https://www.reddit.com/r/funny/comments/1am3x7/so_a_friend_of_my_girlfriend_made_a_cake_for_her/), * Photos originally found at --- [What Every Successful Person Knows, But Never Says](https://jamesclear.com/ira-glass-failure) James Clear blog post that discusses an Ira Glass interview. Indicative quote: > All of us who do creative work, we get into it because we have good taste. But it's like there is this gap. For the first couple years that you're making stuff, what you're making isn't so good. It’s not that great. It’s trying to be good, it has ambition to be good, but it’s not that good. > > But your taste, the thing that got you into the game, is still killer. And your taste is good enough that you can tell that what you're making is kind of a disappointment to you. --- The teams that bring you * tidyverse packages: [org members](https://github.com/orgs/tidyverse/people) and [outside collaborators](https://github.com/orgs/tidyverse/outside-collaborators) * r-lib packages: [org members](https://github.com/orgs/r-lib/people) and [outside collaborators](https://github.com/orgs/r-lib/outside-collaborators) --- Refactoring Improving the Design of Existing Code by Martin Fowler (with Kent Beck, John Brant, William Opdyke, and Don Roberts) https://martinfowler.com/books/refactoring.html Dumpster photo by NeONBRAND https://unsplash.com/photos/8Yk4T-tDSYY --- Code Smells – a Short List blog post by Arne Mertz https://arne-mertz.de/2017/08/code-smells-short-list/ --- bizarro: all code snippets are given here in [R/](R) Beach + glass orb photo by Perchek Industrie https://unsplash.com/photos/y-rmmZZfD1I --- Good enough practices in scientific computing Wilson G, Bryan J, Cranston K, Kitzes J, Nederbragt L, et al. (2017) Good enough practices in scientific computing. PLOS Computational Biology 13(6): e1005510. > Do not comment and uncomment sections of code to control a program's behavior. --- `if() else()` described in breathless AI style: you mean a one layer neural network with identity activation and no hidden layers [Tweet](https://twitter.com/F_Vaggi/status/1011127587639197696) by [Federico Vaggi](https://twitter.com/F_Vaggi) --- Return early and clearly Blog post by Arne Mertz https://arne-mertz.de/2016/12/early-return/ "Handling preconditions" section is the basis of my early return before/after example `get_some_data()`. More posts and conversations about early returns and avoiding if entirely: * [Avoid Else, Return Early](http://blog.timoxley.com/post/47041269194/avoid-else-return-early), blog post by Tim Oxley * [Anti-If: The missing patterns](https://code.joejag.com/2016/anti-if-the-missing-patterns.html), blog post by Joe Wright * Recent discussion on Hacker News: Yoda photo by Kory Westerhold on flickr https://www.flickr.com/photos/korymatthew/14211839966 --- Baby with diaper photo by rawpixel https://unsplash.com/photos/6RjllGKO88U --- In addition to the **Refactoring** book referenced above, these are other good reads for improving your code: The Art of Readable Code Simple and Practical Techniques for Writing Better Code Dustin Boswell, Trevor Foucher http://shop.oreilly.com/product/9780596802301.do The Pragmatic Programmer From Journeyman to Master by Andrew Hunt and David Thomas https://pragprog.com/book/tpp/the-pragmatic-programmer --- Upgrade your cargo cult for the win https://meaningness.com/metablog/upgrade-your-cargo-cult Toddler on run bike photo by Jordan Sanchez https://unsplash.com/photos/Vbzx-yy5FoA ================================================ FILE: code-smells-and-feels.Rproj ================================================ Version: 1.0 RestoreWorkspace: No SaveWorkspace: No AlwaysSaveHistory: Default EnableCodeIndexing: Yes UseSpacesForTab: Yes NumSpacesForTab: 2 Encoding: UTF-8 RnwWeave: Sweave LaTeX: pdfLaTeX AutoAppendNewline: Yes StripTrailingWhitespace: Yes BuildType: Package PackageUseDevtools: Yes PackageInstallArgs: --no-multiarch --with-keep.source PackageRoxygenize: rd,collate,namespace ================================================ FILE: resources/code-smells.txt ================================================ Duplicated Code Long Method Large Class Long Parameter List Divergent Change Shotgun Surgery Feature Envy Data Clumps Primitive Obsession Switch Statements Parallel Inheritance Hierarchies Lazy Class Speculative Generality Temporary Field Message Chains Middle Man Inappropriate Intimacy Alternative Classes with Different Interfaces Incomplete Library Class Data Class Refused Bequest Comments ================================================ FILE: stackoverflow-survey/README.Rmd ================================================ --- output: github_document --- ```{r setup, include = FALSE} knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) ``` # Stack Overflow survey analysis Look at evidence for formal training in CS/programming among those who use R. Use 2018 Stack Overflow Annual Developer Survey: Original code from [Julia Silge](https://juliasilge.com), data scientist at Stack Overflow. Modified by Jenny Bryan. ## Analysis Load packages. ```{r} library(here) library(tidyverse) library(scales) ``` Make sure we have the data. ```{r download-data} survey_path <- here( "stackoverflow-survey/developer_survey_2018/survey_results_public.csv" ) if (!file.exists(survey_path)) { ## consults Content-Description to get filename dl <- usethis:::download_zip( url = "https://drive.google.com/uc?export=download&id=1_9On2-nsBQIw3JiY43sWbrF8EjrqrR4U", destdir = here("stackoverflow-survey") ) target <- here( "stackoverflow-survey", tools::file_path_sans_ext(basename(dl)) ) utils::unzip(dl, exdir = target) usethis::use_git_ignore(basename(target)) unlink(dl) } ``` Load the data. ```{r survey2018} theme_set(theme_minimal(base_family="Source Sans Pro")) survey2018 <- read_csv(survey_path) ``` What kinds of majors do R users have? ```{r wrangle} users_majors <- survey2018 %>% select(Respondent, LanguageWorkedWith, UndergradMajor) %>% filter(!is.na(UndergradMajor)) %>% mutate(LanguageWorkedWith = str_split(LanguageWorkedWith, pattern = ";")) %>% unnest(LanguageWorkedWith) %>% group_by(Respondent) %>% summarize(UsesR = "R" %in% LanguageWorkedWith, UndergradMajor = first(UndergradMajor)) counts_major <- users_majors %>% count(UsesR, UndergradMajor) %>% mutate(UsesR = if_else(UsesR, "useR", "Other")) %>% spread(UsesR, n, fill = 0) logratio_major <- counts_major %>% mutate_if(is.numeric, funs((. + 1) / sum(. + 1))) %>% mutate(logratio = log2(useR / Other)) %>% arrange(desc(logratio)) %>% mutate( UndergradMajor = reorder(UndergradMajor, logratio), Direction = factor(if_else(logratio > 0, "useRs", "Other")), Direction = forcats::fct_reorder(Direction, logratio, .desc = TRUE) ) ``` ```{r reveal-data} knitr::kable(counts_major) sum(counts_major$Other) sum(counts_major$useR) knitr::kable(logratio_major) ``` ```{r major-barchart, fig.height = 6, fig.width = 11, dpi = 300} p <- logratio_major %>% group_by(Direction) %>% ggplot(aes(UndergradMajor, logratio, fill = Direction)) + geom_col(alpha = 0.9) + coord_flip() + scale_y_continuous(breaks = seq(-2, 2), labels = c("0.25x", "0.5x", "Same", "2x", "4x")) ## Julia's original p + labs(y = "Relatively more from R users", x = NULL, fill = "More likely from...", subtitle = "R users are less likely to have formal programming training", title = "What kinds of undergrad majors do R users have?") ## For use in Keynote p + labs(y = "Relative prevalence", x = NULL, fill = "Major is more common among", caption = "Julia Silge & Jenny Bryan\nSource: 2018 Stack Overflow Annual Developer Survey") + theme( legend.position = "top", legend.title = element_text(size = rel(1.4)), axis.text.y = element_text(size = rel(1.3)) ) ``` ================================================ FILE: stackoverflow-survey/README.md ================================================ # Stack Overflow survey analysis Look at evidence for formal training in CS/programming among those who use R. Use 2018 Stack Overflow Annual Developer Survey: Original code from [Julia Silge](https://juliasilge.com), data scientist at Stack Overflow. Modified by Jenny Bryan. ## Analysis Load packages. ``` r library(here) #> here() starts at /Users/jenny/talks/2018-07_user-brisbane library(tidyverse) #> ── Attaching packages ──────────────────────────────────────── tidyverse 1.2.1 ── #> ✔ ggplot2 2.2.1 ✔ purrr 0.2.5 #> ✔ tibble 1.4.2 ✔ dplyr 0.7.6 #> ✔ tidyr 0.8.1 ✔ stringr 1.3.1 #> ✔ readr 1.1.1 ✔ forcats 0.3.0 #> ── Conflicts ─────────────────────────────────────────── tidyverse_conflicts() ── #> ✖ dplyr::filter() masks stats::filter() #> ✖ dplyr::lag() masks stats::lag() library(scales) #> #> Attaching package: 'scales' #> The following object is masked from 'package:purrr': #> #> discard #> The following object is masked from 'package:readr': #> #> col_factor ``` Make sure we have the data. ``` r survey_path <- here( "stackoverflow-survey/developer_survey_2018/survey_results_public.csv" ) if (!file.exists(survey_path)) { ## consults Content-Description to get filename dl <- usethis:::download_zip( url = "https://drive.google.com/uc?export=download&id=1_9On2-nsBQIw3JiY43sWbrF8EjrqrR4U", destdir = here("stackoverflow-survey") ) target <- here( "stackoverflow-survey", tools::file_path_sans_ext(basename(dl)) ) utils::unzip(dl, exdir = target) usethis::use_git_ignore(basename(target)) unlink(dl) } ``` Load the data. ``` r theme_set(theme_minimal(base_family="Source Sans Pro")) survey2018 <- read_csv(survey_path) #> Parsed with column specification: #> cols( #> .default = col_character(), #> Respondent = col_integer(), #> AssessJob1 = col_integer(), #> AssessJob2 = col_integer(), #> AssessJob3 = col_integer(), #> AssessJob4 = col_integer(), #> AssessJob5 = col_integer(), #> AssessJob6 = col_integer(), #> AssessJob7 = col_integer(), #> AssessJob8 = col_integer(), #> AssessJob9 = col_integer(), #> AssessJob10 = col_integer(), #> AssessBenefits1 = col_integer(), #> AssessBenefits2 = col_integer(), #> AssessBenefits3 = col_integer(), #> AssessBenefits4 = col_integer(), #> AssessBenefits5 = col_integer(), #> AssessBenefits6 = col_integer(), #> AssessBenefits7 = col_integer(), #> AssessBenefits8 = col_integer(), #> AssessBenefits9 = col_integer() #> # ... with 23 more columns #> ) #> See spec(...) for full column specifications. ``` What kinds of majors do R users have? ``` r users_majors <- survey2018 %>% select(Respondent, LanguageWorkedWith, UndergradMajor) %>% filter(!is.na(UndergradMajor)) %>% mutate(LanguageWorkedWith = str_split(LanguageWorkedWith, pattern = ";")) %>% unnest(LanguageWorkedWith) %>% group_by(Respondent) %>% summarize(UsesR = "R" %in% LanguageWorkedWith, UndergradMajor = first(UndergradMajor)) counts_major <- users_majors %>% count(UsesR, UndergradMajor) %>% mutate(UsesR = if_else(UsesR, "useR", "Other")) %>% spread(UsesR, n, fill = 0) logratio_major <- counts_major %>% mutate_if(is.numeric, funs((. + 1) / sum(. + 1))) %>% mutate(logratio = log2(useR / Other)) %>% arrange(desc(logratio)) %>% mutate( UndergradMajor = reorder(UndergradMajor, logratio), Direction = factor(if_else(logratio > 0, "useRs", "Other")), Direction = forcats::fct_reorder(Direction, logratio, .desc = TRUE) ) ``` ``` r knitr::kable(counts_major) ``` | UndergradMajor | Other | useR | | :-------------------------------------------------------------------- | ----: | ---: | | A business discipline (ex. accounting, finance, marketing) | 1750 | 171 | | A health science (ex. nursing, pharmacy, radiology) | 217 | 29 | | A humanities discipline (ex. literature, history, philosophy) | 1487 | 103 | | A natural science (ex. biology, chemistry, physics) | 2561 | 489 | | A social science (ex. anthropology, psychology, political science) | 1122 | 255 | | Another engineering discipline (ex. civil, electrical, mechanical) | 6575 | 370 | | Computer science, computer engineering, or software engineering | 48340 | 1996 | | Fine arts or performing arts (ex. graphic design, music, studio art) | 1105 | 30 | | I never declared a major | 677 | 16 | | Information systems, information technology, or system administration | 6307 | 200 | | Mathematics or statistics | 2236 | 582 | | Web development or web design | 2397 | 21 | ``` r sum(counts_major$Other) #> [1] 74774 sum(counts_major$useR) #> [1] 4262 knitr::kable(logratio_major) ``` | UndergradMajor | Other | useR | logratio | Direction | | :-------------------------------------------------------------------- | --------: | --------: | ----------: | :-------- | | Mathematics or statistics | 0.0299120 | 0.1364062 | 2.1891119 | useRs | | A social science (ex. anthropology, psychology, political science) | 0.0150162 | 0.0598971 | 1.9959672 | useRs | | A natural science (ex. biology, chemistry, physics) | 0.0342577 | 0.1146467 | 1.7426926 | useRs | | A health science (ex. nursing, pharmacy, radiology) | 0.0029150 | 0.0070192 | 1.2678157 | useRs | | A business discipline (ex. accounting, finance, marketing) | 0.0234135 | 0.0402433 | 0.7814108 | useRs | | A humanities discipline (ex. literature, history, philosophy) | 0.0198968 | 0.0243332 | 0.2903903 | useRs | | Another engineering discipline (ex. civil, electrical, mechanical) | 0.0879309 | 0.0868039 | \-0.0186098 | Other | | Computer science, computer engineering, or software engineering | 0.6463910 | 0.4672438 | \-0.4682317 | Other | | Information systems, information technology, or system administration | 0.0843473 | 0.0470285 | \-0.8428058 | Other | | Fine arts or performing arts (ex. graphic design, music, studio art) | 0.0147889 | 0.0072532 | \-1.0278300 | Other | | I never declared a major | 0.0090659 | 0.0039775 | \-1.1885692 | Other | | Web development or web design | 0.0320648 | 0.0051474 | \-2.6390749 | Other | ``` r p <- logratio_major %>% group_by(Direction) %>% ggplot(aes(UndergradMajor, logratio, fill = Direction)) + geom_col(alpha = 0.9) + coord_flip() + scale_y_continuous(breaks = seq(-2, 2), labels = c("0.25x", "0.5x", "Same", "2x", "4x")) ## Julia's original p + labs(y = "Relatively more from R users", x = NULL, fill = "More likely from...", subtitle = "R users are less likely to have formal programming training", title = "What kinds of undergrad majors do R users have?") ``` ![](README_files/figure-gfm/major-barchart-1.png) ``` r ## For use in Keynote p + labs(y = "Relative prevalence", x = NULL, fill = "Major is more common among", caption = "Julia Silge & Jenny Bryan\nSource: 2018 Stack Overflow Annual Developer Survey") + theme( legend.position = "top", legend.title = element_text(size = rel(1.4)), axis.text.y = element_text(size = rel(1.3)) ) ``` ![](README_files/figure-gfm/major-barchart-2.png)