[
  {
    "path": ".gitignore",
    "content": ".Rhistory\n.RData\n.Rproj.user\ndeveloper_survey_2018\nRefactoring_improving_the_design_of_existing_code.pdf\nkeynote*\n"
  },
  {
    "path": "R/00_biz_comment-decomment.R",
    "content": "x <- 1:5\n#x <- c(TRUE, FALSE, FALSE, TRUE, FALSE)\n\ncat(\n  \"The bizarro version of x is\",\n  -x,\n  #!x,\n  \"\\n\"\n)\n"
  },
  {
    "path": "R/01_biz_if-else.R",
    "content": "x <- 1:5\n#x <- c(TRUE, FALSE, FALSE, TRUE, FALSE)\n\ncat(\n  \"The bizarro version of x is\",\n  if (is.numeric(x)) {\n    -x\n  } else {\n    !x\n  },\n  \"\\n\"\n)\n"
  },
  {
    "path": "R/02_biz_function.R",
    "content": "bizarro <- function(x) {\n  if (is.numeric(x)) {\n    -x\n  } else {\n    !x\n  }\n}\n\nbizarro(1:5)\n\nbizarro(c(TRUE, FALSE, FALSE, TRUE, FALSE))\n"
  },
  {
    "path": "R/03_biz_complicated-conditions.R",
    "content": "bizarro <- function(x) {\n  if (class(x)[[1]] == \"numeric\" || class(x)[[1]] == \"integer\") {\n    -x\n  } else if (class(x)[[1]] == \"logical\") {\n    !x\n  } else {\n    stop(\n      \"Don't know how to make bizzaro <\", class(x)[[1]], \">\",\n      call. = FALSE)\n  }\n}\n\nbizarro(c(TRUE, FALSE, FALSE, TRUE, FALSE))\n\nbizarro(1:5)\n\nbizarro(c(\"abc\", \"def\"))\n"
  },
  {
    "path": "R/04_biz_simplify-conditions.R",
    "content": "bizarro <- function(x) {\n  if (is.numeric(x)) {\n    -x\n  } else if (is.logical(x)) {\n    !x\n  } else {\n    stop(\n      \"Don't know how to make bizzaro <\", class(x)[[1]], \">\",\n      call. = FALSE)\n  }\n}\n\nbizarro(c(TRUE, FALSE, FALSE, TRUE, FALSE))\n\nbizarro(1:5)\n\nbizarro(c(\"abc\", \"def\"))\n"
  },
  {
    "path": "R/05_googledrive_is-parental.R",
    "content": "## https://github.com/tidyverse/googledrive/blob/b6e97999d0781c31c69ef6fa93d1091a04b44d20/R/drive_cp.R#L67\n\ndrive_cp <- function(file, ...) {\n  file <- as_dribble(file)\n  file <- confirm_single_file(file)\n  if (is_parental(file)) {\n    stop_glue(\"The Drive API does not copy folders or Team Drives.\")\n  }\n  ...\n}\n\n## https://github.com/tidyverse/googledrive/blob/b6e97999d0781c31c69ef6fa93d1091a04b44d20/R/dribble.R#L253\n\nis_parental <- function(d) {\n  stopifnot(inherits(d, \"dribble\"))\n  kind <- purrr::map_chr(d$drive_resource, \"kind\")\n  mime_type <- purrr::map_chr(d$drive_resource, \"mimeType\", .default = NA)\n  kind == \"drive#teamDrive\" | mime_type == \"application/vnd.google-apps.folder\"\n}\n"
  },
  {
    "path": "R/06_biz_stopifnot.R",
    "content": "bizarro <- function(x) {\n  stopifnot(is.numeric(x) || is.logical(x))\n  \n  if (is.numeric(x)) {\n    -x\n  } else {\n    !x\n  }\n}\n\nbizarro(c(TRUE, FALSE, FALSE, TRUE, FALSE))\n\nbizarro(1:5)\n\nbizarro(c(\"abc\", \"def\"))\n"
  },
  {
    "path": "R/07_get-some-data_before.R",
    "content": "get_some_data <- function(config, outfile) {\n  if (config_ok(config)) {\n    if (can_write(outfile)) {\n      if (can_open_network_connection(config)) {\n        data <- parse_something_from_network()\n        if(makes_sense(data)) {\n          data <- beautify(data)\n          write_it(data, outfile)\n          return(TRUE)\n        } else {\n          return(FALSE)\n        }\n      } else {\n        stop(\"Can't access network\")\n      }\n    } else {\n      ## uhm. What was this else for again?\n    }\n  } else {\n    ## maybe, some bad news about ... the config? \n  }\n}\n"
  },
  {
    "path": "R/08_get-some-data_after.R",
    "content": "get_some_data <- function(config, outfile) {\n  if (config_bad(config)) {\n    stop(\"Bad config\")\n  }\n  \n  if (!can_write(outfile)) {\n    stop(\"Can't write outfile\")\n  }\n  \n  if (!can_open_network_connection(config)) {\n    stop(\"Can't access network\")\n  }\n  \n  data <- parse_something_from_network()\n  if(!makes_sense(data)) {\n    return(FALSE)\n  }\n  \n  data <- beautify(data)\n  write_it(data, outfile)\n  TRUE\n}\n"
  },
  {
    "path": "R/09_googledrive_process-response.R",
    "content": "## https://github.com/tidyverse/googledrive/blob/94c8c01cbb9fd96fe59920cf58e39263a3135337/R/process_response.R#L8\n\nprocess_response <- function(res) {\n  if (httr::status_code(res) == 204) {\n    return(TRUE)\n  }\n  \n  if (httr::status_code(res) >= 200 && httr::status_code(res) < 300) {\n    return(res %>%\n             stop_for_content_type() %>%\n             httr::content(as = \"parsed\", type = \"application/json\"))\n  }\n  \n  ## 20+ more lines of error handling ...\n}"
  },
  {
    "path": "R/10_biz_if-else-apalooza.R",
    "content": "str_reverse <- function(x) {\n  vapply(\n    strsplit(x, \"\"),\n    FUN = function(z) paste(rev(z), collapse = \"\"),\n    FUN.VALUE = \"\")\n}\nstr_reverse(c(\"abc\", \"def\"))\n\nbizarro <- function(x) {\n  if (is.numeric(x)) {\n    -x\n  } else if (is.logical(x)) {\n    !x\n  } else if (is.character(x)) {\n    str_reverse(x)\n  } else if (is.factor(x)) {\n    levels(x) <- rev(levels(x))\n    x\n  } else { \n    stop(\n      \"Don't know how to make bizzaro <\", class(x)[[1]], \">\",\n      call. = FALSE)\n  }\n}\n\nbizarro(1:5)\n\nbizarro(c(TRUE, FALSE, FALSE, TRUE, FALSE))\n\nbizarro(c(\"abc\", \"def\"))\n\n(m <- factor(month.abb, levels = month.abb))\nbizarro(factor(m))\n\nbizarro(iris)\n"
  },
  {
    "path": "R/11_biz_S3.R",
    "content": "## snippets on slides created with lots of selective reprex()ing and toggling of\n## the chunk options\n\n#+ include = FALSE\nstr_reverse <- function(x) {\n  vapply(\n    strsplit(x, \"\"),\n    FUN = function(z) paste(rev(z), collapse = \"\"),\n    FUN.VALUE = \"\")\n}\n\n#+ include = FALSE\nbizarro <- function(x) {\n  UseMethod(\"bizarro\")\n}\n\nbizarro.default <- function(x) {\n  stop(\n    \"Don't know how to make bizzaro <\",\n    class(x)[[1]], \">\",\n    call. = FALSE\n  )\n}\n\n#+ include = FALSE, eval = FALSE\nbizarro(1:5)\n\nbizarro(TRUE)\n\nbizarro(\"abc\")\n\n#+ include = FALSE\nbizarro.numeric <- function(x) -x\n\nbizarro.logical <- function(x) !x\n\nbizarro.character <- function(x) str_reverse(x)\n\nbizarro.factor <- function(x) {\n  levels(x) <- rev(levels(x))\n  x\n}\n\nbizarro.data.frame <- function(x) {\n  names(x) <- bizarro(names(x))\n  x[] <- lapply(x, bizarro)\n  x\n}\n\n#+ include = TRUE\nbizarro(1:5)\n\nbizarro(c(TRUE, FALSE, FALSE, TRUE, FALSE))\n\nbizarro(c(\"abc\", \"def\"))\n\n(m <- factor(month.abb[1:3], levels = month.abb[1:3]))\nbizarro(m)\n\nbizarro(head(iris, 3))\n"
  },
  {
    "path": "R/12_biz_switch.R",
    "content": "#+ include = FALSE\nstr_reverse <- function(x) {\n  vapply(\n    strsplit(x, \"\"),\n    FUN = function(z) paste(rev(z), collapse = \"\"),\n    FUN.VALUE = \"\")\n}\n\n#+ include = TRUE\nbizarro <- function(x) {\n  cls <- class(x)[[1]] ## not a great idea, in general\n  switch(\n    cls,\n    logical = !x,\n    integer = ,\n    numeric = -x,\n    character = str_reverse(x),\n    stop(\"Don't know how to make bizzaro <\", cls, \">\", call. = FALSE)\n  )\n}\n\nbizarro(1:5)\n\nbizarro(c(TRUE, FALSE, FALSE, TRUE, FALSE))\n\nbizarro(c(\"abc\", \"def\"))\n\nbizarro(iris)\n"
  },
  {
    "path": "R/13_stringr_switch.R",
    "content": "## https://github.com/tidyverse/stringr/blob/e775974d2822ae0de90ab33cf9f02273c22a3801/R/pad.r#L30\n\nstr_pad <- function(string,\n                    width,\n                    side = c(\"left\", \"right\", \"both\"),\n                    pad = \" \") {\n  side <- match.arg(side)\n\n  switch(\n    side,\n    left  =  stri_pad_left(string, width, pad = pad),\n    right = stri_pad_right(string, width, pad = pad),\n    both  =  stri_pad_both(string, width, pad = pad)\n  )\n}\n"
  },
  {
    "path": "R/14_age_case-when.R",
    "content": "library(tidyverse)\n\ntibble(\n  age_yrs = c(0, 4, 10, 15, 24, 55),\n  age_cat = case_when(\n    age_yrs < 2  ~ \"baby\",\n    age_yrs < 13 ~ \"kid\",\n    age_yrs < 20 ~ \"teen\",\n    TRUE         ~ \"adult\"\n  )\n)\n\nage_yrs <- c(0, 8, 15, 24, 55)\n\ncat(\nifelse(age_yrs < 2, \"baby\",\n       ifelse(age_yrs < 13, \"kid\",\n              ifelse(age_yrs < 20, \"teen\",\n                     \"adult\"\n              )\n       )\n)\n,sep=\"\\n\")\n\n\n"
  },
  {
    "path": "R/15_devtools_%||%.R",
    "content": "## https://github.com/r-lib/devtools/blob/b01edfbfa1fd0e3965a24188805c5e55f0d7376f/R/build-manual.R\n\n#' Create package pdf manual\n#'\n#' @param pkg package path or name\n#' @param path path in which to produce package manual\nbuild_manual <- function(pkg = \".\", path = NULL) {\n  pkg <- as.package(pkg)\n  path <- path %||% dirname(pkg$path)\n  ...\n}\n\n## https://github.com/r-lib/devtools/blob/bd3bdf15b8f2e5e07d750de4360df28090a9f117/R/install-github.r#L73-L74\ngithub_remote <- function(repo, username = NULL, ...) {\n  meta <- parse_git_repo(repo)\n  ...\n  meta$username <- username %||%\n    getOption(\"github.user\") %||%\n    stop(\"Unknown username.\")\n  ...\n}\n\nif (is.null(path)) {\n  path <- dirname(pkg$path)\n}\n\n`%||%` <- function(x, y) {\n  if (is_null(x)) y else x\n}\n\nf <- function(x, y = TRUE, z = NULL) {\n  ...\n  if (is.null(z)) {\n    z <- much_logic(other, stuff, ...)\n  }\n  ...\n}\n\nf <- function(x, y = TRUE, z = NULL) {\n  ...\n  z <- z %||% much_logic(other, stuff, ...)\n  ...\n}\n"
  },
  {
    "path": "README.md",
    "content": "# Code Smells and Feels\n\nTalk initially prepared for [useR!2018](https://user2018.r-project.org) Brisbane. Also delivered elsewhere, such as the [First Mexican Statistical Association School in Data Science](https://amestad.mx/escuela/1/)  \nby Jenny Bryan  \n[jennybryan.org](https://jennybryan.org)  \nTwitter: [@jennyBryan](https://twitter.com/jennyBryan/)  \nGitHub: [@jennybc](https://github.com/jennybc)  \n\n> \"Code smell\" is an evocative term for that vague feeling of unease we get when reading certain bits of code. It's not necessarily wrong, but neither is it obviously correct. We may be reluctant to work on such code, because past experience suggests it's going to be fiddly and bug-prone. In contrast, there's another type of code that just feels good to read and work on. What's the difference? If we can be more precise about code smells and feels, we can be intentional about writing code that is easier and more pleasant to work on. I've been fortunate to spend the last couple years embedded in a group of developers working on the tidyverse and r-lib packages. Based on this experience, I'll talk about specific code smells and deodorizing strategies for R.\n\n## Link to this repo\n\n[rstd.io/code-smells](https://rstd.io/code-smells) is a shortlink to HERE\n\n## Slides\n\n<a href=\"https://speakerdeck.com/jennybc/code-smells-and-feels\"><img src=\"2018-07_user-brisbane-400.jpeg\"></a>\n\nSlides [on SpeakerDeck](https://speakerdeck.com/jennybc/code-smells-and-feels)\n\nSlides [as PDF file](2018-07_user-brisbane-bryan.pdf) here in this repo\n\n## Video\n\nVideo is available on YouTube:  \n<https://www.youtube.com/watch?v=7oyiPBjLAWY>\n\n## Credits and resources\n\nAnnotated and hyperlink-y list of resources mentioned in the slides, in roughly the same order.\n\n---\n\nDo useRs have less formal training in CS/programming than others writing code?\n\n2018 Stack Overflow Annual Developer Survey: <https://insights.stackoverflow.com/survey>\n\nAdapted from original code by [Julia Silge](https://juliasilge.com), data scientist at  Stack Overflow.\n\nCode here in this repo: [stackoverflow-survey](stackoverflow-survey)\n\n---\n\nTalks about programming style, workflow, and policies\n\nSome that inspired me:\n\nGood Programming Practice, UseR! 2004 Keynote, Martin Mächler  \nhttp://www.ci.tuwien.ac.at/Conferences/useR-2004/  \n\nWhat I find important when R Programming and Recent Cool Features in R  \n2018 eRum Keynote by Martin Mächler and R Core Team  \nhttp://stat.ethz.ch/~maechler/U/R/eRum_2018_ProgR-ALTREP.html  \n\nMy own efforts in this genre:\n\n[Zen And The aRt Of Workflow Maintenance](https://speakerdeck.com/jennybc/zen-and-the-art-of-workflow-maintenance), IASC/NZSA 2017, Jenny Bryan\n\n[Workflow: You should have one](https://speakerdeck.com/jennybc/workflow-you-should-have-one), EARL London 2017, Jenny Bryan\n\n---\n\nCakes that look like hedgehogs ... sort of?\n\n * Beautiful hedgehog cake: [BBC goodfood recipe](https://www.bbcgoodfood.com/recipes/hedgehog-cake)\n  * Homely hedgehog cake: [Reddit thread](https://www.reddit.com/r/funny/comments/1am3x7/so_a_friend_of_my_girlfriend_made_a_cake_for_her/), <http://i.imgur.com/peilfAh.jpg> \n  * Photos originally found at <https://www.boredpanda.com/funny-cake-fails-expectations-reality/>\n  \n---\n\n[What Every Successful Person Knows, But Never Says](https://jamesclear.com/ira-glass-failure)  \nJames Clear blog post that discusses an Ira Glass interview. Indicative quote:\n\n> All of us who do creative work, we get into it because we have good taste. But it's like there is this gap. For the first couple years that you're making stuff, what you're making isn't so good. It’s not that great. It’s trying to be good, it has ambition to be good, but it’s not that good.\n>\n> But your taste, the thing that got you into the game, is still killer. And your taste is good enough that you can tell that what you're making is kind of a disappointment to you.\n\n---\n\nThe teams that bring you\n\n  * tidyverse packages: [org members](https://github.com/orgs/tidyverse/people) and [outside collaborators](https://github.com/orgs/tidyverse/outside-collaborators)\n  * r-lib packages: [org members](https://github.com/orgs/r-lib/people) and [outside collaborators](https://github.com/orgs/r-lib/outside-collaborators)\n  \n---\n\nRefactoring  \nImproving the Design of Existing Code  \nby Martin Fowler  \n(with Kent Beck, John Brant, William Opdyke, and Don Roberts)  \nhttps://martinfowler.com/books/refactoring.html\n\nDumpster photo by NeONBRAND  \nhttps://unsplash.com/photos/8Yk4T-tDSYY\n\n---\n\nCode Smells – a Short List  \nblog post by Arne Mertz  \nhttps://arne-mertz.de/2017/08/code-smells-short-list/\n\n---\n\nbizarro: all code snippets are given here in [R/](R)\n\nBeach + glass orb photo by Perchek Industrie  \nhttps://unsplash.com/photos/y-rmmZZfD1I\n\n---\n\nGood enough practices in scientific computing  \nWilson G, Bryan J, Cranston K, Kitzes J, Nederbragt L, et al. (2017) Good enough practices in scientific computing. PLOS Computational Biology 13(6): e1005510. <https://doi.org/10.1371/journal.pcbi.1005510>\n\n> Do not comment and uncomment sections of code to control a program's behavior.\n\n---\n\n`if() else()` described in breathless AI style:  \nyou mean a one layer neural network with identity activation and no hidden layers\n\n[Tweet](https://twitter.com/F_Vaggi/status/1011127587639197696) by [Federico Vaggi](https://twitter.com/F_Vaggi)\n\n---\n\nReturn early and clearly  \nBlog post by Arne Mertz  \nhttps://arne-mertz.de/2016/12/early-return/  \n\"Handling preconditions\" section is the basis of my early return before/after example `get_some_data()`.\n\nMore posts and conversations about early returns and avoiding if entirely:\n\n  * [Avoid Else, Return Early](http://blog.timoxley.com/post/47041269194/avoid-else-return-early), blog post by Tim Oxley\n  * [Anti-If: The missing patterns](https://code.joejag.com/2016/anti-if-the-missing-patterns.html), blog post by Joe Wright\n  * Recent discussion on Hacker News: <https://news.ycombinator.com/item?id=17408836>\n\nYoda photo by Kory Westerhold on flickr  \nhttps://www.flickr.com/photos/korymatthew/14211839966\n\n---\n\nBaby with diaper photo by rawpixel  \nhttps://unsplash.com/photos/6RjllGKO88U\n\n---\n\nIn addition to the **Refactoring** book referenced above, these are other good reads for improving your code:\n\nThe Art of Readable Code  \nSimple and Practical Techniques for Writing Better Code  \nDustin Boswell, Trevor Foucher  \nhttp://shop.oreilly.com/product/9780596802301.do\n\nThe Pragmatic Programmer  \nFrom Journeyman to Master  \nby Andrew Hunt and David Thomas  \nhttps://pragprog.com/book/tpp/the-pragmatic-programmer\n\n---\n\nUpgrade your cargo cult for the win  \nhttps://meaningness.com/metablog/upgrade-your-cargo-cult\n\nToddler on run bike photo by Jordan Sanchez  \nhttps://unsplash.com/photos/Vbzx-yy5FoA\n"
  },
  {
    "path": "code-smells-and-feels.Rproj",
    "content": "Version: 1.0\n\nRestoreWorkspace: No\nSaveWorkspace: No\nAlwaysSaveHistory: Default\n\nEnableCodeIndexing: Yes\nUseSpacesForTab: Yes\nNumSpacesForTab: 2\nEncoding: UTF-8\n\nRnwWeave: Sweave\nLaTeX: pdfLaTeX\n\nAutoAppendNewline: Yes\nStripTrailingWhitespace: Yes\n\nBuildType: Package\nPackageUseDevtools: Yes\nPackageInstallArgs: --no-multiarch --with-keep.source\nPackageRoxygenize: rd,collate,namespace\n"
  },
  {
    "path": "resources/code-smells.txt",
    "content": "Duplicated Code\nLong Method\nLarge Class\nLong Parameter List\nDivergent Change\nShotgun Surgery\nFeature Envy\nData Clumps\nPrimitive Obsession\nSwitch Statements\nParallel Inheritance Hierarchies\nLazy Class\nSpeculative Generality\nTemporary Field\nMessage Chains\nMiddle Man\nInappropriate Intimacy\nAlternative Classes with Different Interfaces\nIncomplete Library Class\nData Class\nRefused Bequest\nComments"
  },
  {
    "path": "stackoverflow-survey/README.Rmd",
    "content": "---\noutput: github_document\n---\n\n<!-- README.md is generated from README.Rmd. Please edit that file -->\n\n```{r setup, include = FALSE}\nknitr::opts_chunk$set(\n  collapse = TRUE,\n  comment = \"#>\"\n)\n```\n\n# Stack Overflow survey analysis\n\nLook at evidence for formal training in CS/programming among those who use R.\n\nUse 2018 Stack Overflow Annual Developer Survey: <https://insights.stackoverflow.com/survey>\n\nOriginal code from [Julia Silge](https://juliasilge.com), data scientist at  Stack Overflow. Modified by Jenny Bryan.\n\n## Analysis\n\nLoad packages.\n\n```{r}\nlibrary(here)\nlibrary(tidyverse)\nlibrary(scales)\n```\n\nMake sure we have the data.\n\n```{r download-data}\nsurvey_path <- here(\n  \"stackoverflow-survey/developer_survey_2018/survey_results_public.csv\"\n)\nif (!file.exists(survey_path)) {\n  ## consults Content-Description to get filename\n  dl <- usethis:::download_zip(\n    url = \"https://drive.google.com/uc?export=download&id=1_9On2-nsBQIw3JiY43sWbrF8EjrqrR4U\",\n    destdir = here(\"stackoverflow-survey\")\n  )\n  target <- here(\n    \"stackoverflow-survey\",\n    tools::file_path_sans_ext(basename(dl))\n  )\n  utils::unzip(dl, exdir = target)\n  usethis::use_git_ignore(basename(target))\n  unlink(dl)\n}\n```\n\nLoad the data.\n\n```{r survey2018}\ntheme_set(theme_minimal(base_family=\"Source Sans Pro\"))\nsurvey2018 <- read_csv(survey_path)\n```\n\nWhat kinds of majors do R users have?\n\n```{r wrangle}\nusers_majors <- survey2018 %>%\n  select(Respondent, LanguageWorkedWith, UndergradMajor) %>%\n  filter(!is.na(UndergradMajor)) %>%\n  mutate(LanguageWorkedWith = str_split(LanguageWorkedWith, pattern = \";\")) %>%\n  unnest(LanguageWorkedWith) %>%\n  group_by(Respondent) %>%\n  summarize(UsesR = \"R\" %in% LanguageWorkedWith,\n            UndergradMajor = first(UndergradMajor))\n\ncounts_major <- users_majors %>%\n  count(UsesR, UndergradMajor) %>%\n  mutate(UsesR = if_else(UsesR, \"useR\", \"Other\")) %>%\n  spread(UsesR, n, fill = 0)\n\nlogratio_major <- counts_major %>%\n  mutate_if(is.numeric, funs((. + 1) / sum(. + 1))) %>%\n  mutate(logratio = log2(useR / Other)) %>%\n  arrange(desc(logratio)) %>%\n  mutate(\n    UndergradMajor = reorder(UndergradMajor, logratio),\n    Direction = factor(if_else(logratio > 0, \"useRs\", \"Other\")),\n    Direction = forcats::fct_reorder(Direction, logratio, .desc = TRUE)\n  )\n```\n\n```{r reveal-data}\nknitr::kable(counts_major)\nsum(counts_major$Other)\nsum(counts_major$useR)\nknitr::kable(logratio_major)\n```\n\n```{r major-barchart, fig.height = 6, fig.width = 11, dpi = 300}\np <- logratio_major %>% \n  group_by(Direction) %>% \n  ggplot(aes(UndergradMajor, logratio, fill = Direction)) +\n  geom_col(alpha = 0.9) +\n  coord_flip() +\n  scale_y_continuous(breaks = seq(-2, 2),\n                     labels = c(\"0.25x\", \"0.5x\", \"Same\", \"2x\", \"4x\"))\n\n## Julia's original\np +\n  labs(y = \"Relatively more from R users\", x = NULL,\n       fill = \"More likely from...\",\n       subtitle = \"R users are less likely to have formal programming training\",\n       title = \"What kinds of undergrad majors do R users have?\")       \n\n## For use in Keynote\np +\n  labs(y = \"Relative prevalence\", x = NULL,\n       fill = \"Major is more common among\",\n       caption = \"Julia Silge & Jenny Bryan\\nSource: 2018 Stack Overflow Annual Developer Survey\") +\n  theme(\n    legend.position = \"top\",\n    legend.title = element_text(size = rel(1.4)),\n    axis.text.y = element_text(size = rel(1.3))\n  )\n```\n\n"
  },
  {
    "path": "stackoverflow-survey/README.md",
    "content": "\n<!-- README.md is generated from README.Rmd. Please edit that file -->\n\n# Stack Overflow survey analysis\n\nLook at evidence for formal training in CS/programming among those who\nuse R.\n\nUse 2018 Stack Overflow Annual Developer Survey:\n<https://insights.stackoverflow.com/survey>\n\nOriginal code from [Julia Silge](https://juliasilge.com), data scientist\nat Stack Overflow. Modified by Jenny Bryan.\n\n## Analysis\n\nLoad packages.\n\n``` r\nlibrary(here)\n#> here() starts at /Users/jenny/talks/2018-07_user-brisbane\nlibrary(tidyverse)\n#> ── Attaching packages ──────────────────────────────────────── tidyverse 1.2.1 ──\n#> ✔ ggplot2 2.2.1     ✔ purrr   0.2.5\n#> ✔ tibble  1.4.2     ✔ dplyr   0.7.6\n#> ✔ tidyr   0.8.1     ✔ stringr 1.3.1\n#> ✔ readr   1.1.1     ✔ forcats 0.3.0\n#> ── Conflicts ─────────────────────────────────────────── tidyverse_conflicts() ──\n#> ✖ dplyr::filter() masks stats::filter()\n#> ✖ dplyr::lag()    masks stats::lag()\nlibrary(scales)\n#> \n#> Attaching package: 'scales'\n#> The following object is masked from 'package:purrr':\n#> \n#>     discard\n#> The following object is masked from 'package:readr':\n#> \n#>     col_factor\n```\n\nMake sure we have the data.\n\n``` r\nsurvey_path <- here(\n  \"stackoverflow-survey/developer_survey_2018/survey_results_public.csv\"\n)\nif (!file.exists(survey_path)) {\n  ## consults Content-Description to get filename\n  dl <- usethis:::download_zip(\n    url = \"https://drive.google.com/uc?export=download&id=1_9On2-nsBQIw3JiY43sWbrF8EjrqrR4U\",\n    destdir = here(\"stackoverflow-survey\")\n  )\n  target <- here(\n    \"stackoverflow-survey\",\n    tools::file_path_sans_ext(basename(dl))\n  )\n  utils::unzip(dl, exdir = target)\n  usethis::use_git_ignore(basename(target))\n  unlink(dl)\n}\n```\n\nLoad the data.\n\n``` r\ntheme_set(theme_minimal(base_family=\"Source Sans Pro\"))\nsurvey2018 <- read_csv(survey_path)\n#> Parsed with column specification:\n#> cols(\n#>   .default = col_character(),\n#>   Respondent = col_integer(),\n#>   AssessJob1 = col_integer(),\n#>   AssessJob2 = col_integer(),\n#>   AssessJob3 = col_integer(),\n#>   AssessJob4 = col_integer(),\n#>   AssessJob5 = col_integer(),\n#>   AssessJob6 = col_integer(),\n#>   AssessJob7 = col_integer(),\n#>   AssessJob8 = col_integer(),\n#>   AssessJob9 = col_integer(),\n#>   AssessJob10 = col_integer(),\n#>   AssessBenefits1 = col_integer(),\n#>   AssessBenefits2 = col_integer(),\n#>   AssessBenefits3 = col_integer(),\n#>   AssessBenefits4 = col_integer(),\n#>   AssessBenefits5 = col_integer(),\n#>   AssessBenefits6 = col_integer(),\n#>   AssessBenefits7 = col_integer(),\n#>   AssessBenefits8 = col_integer(),\n#>   AssessBenefits9 = col_integer()\n#>   # ... with 23 more columns\n#> )\n#> See spec(...) for full column specifications.\n```\n\nWhat kinds of majors do R users have?\n\n``` r\nusers_majors <- survey2018 %>%\n  select(Respondent, LanguageWorkedWith, UndergradMajor) %>%\n  filter(!is.na(UndergradMajor)) %>%\n  mutate(LanguageWorkedWith = str_split(LanguageWorkedWith, pattern = \";\")) %>%\n  unnest(LanguageWorkedWith) %>%\n  group_by(Respondent) %>%\n  summarize(UsesR = \"R\" %in% LanguageWorkedWith,\n            UndergradMajor = first(UndergradMajor))\n\ncounts_major <- users_majors %>%\n  count(UsesR, UndergradMajor) %>%\n  mutate(UsesR = if_else(UsesR, \"useR\", \"Other\")) %>%\n  spread(UsesR, n, fill = 0)\n\nlogratio_major <- counts_major %>%\n  mutate_if(is.numeric, funs((. + 1) / sum(. + 1))) %>%\n  mutate(logratio = log2(useR / Other)) %>%\n  arrange(desc(logratio)) %>%\n  mutate(\n    UndergradMajor = reorder(UndergradMajor, logratio),\n    Direction = factor(if_else(logratio > 0, \"useRs\", \"Other\")),\n    Direction = forcats::fct_reorder(Direction, logratio, .desc = TRUE)\n  )\n```\n\n``` r\nknitr::kable(counts_major)\n```\n\n| UndergradMajor                                                        | Other | useR |\n| :-------------------------------------------------------------------- | ----: | ---: |\n| A business discipline (ex. accounting, finance, marketing)            |  1750 |  171 |\n| A health science (ex. nursing, pharmacy, radiology)                   |   217 |   29 |\n| A humanities discipline (ex. literature, history, philosophy)         |  1487 |  103 |\n| A natural science (ex. biology, chemistry, physics)                   |  2561 |  489 |\n| A social science (ex. anthropology, psychology, political science)    |  1122 |  255 |\n| Another engineering discipline (ex. civil, electrical, mechanical)    |  6575 |  370 |\n| Computer science, computer engineering, or software engineering       | 48340 | 1996 |\n| Fine arts or performing arts (ex. graphic design, music, studio art)  |  1105 |   30 |\n| I never declared a major                                              |   677 |   16 |\n| Information systems, information technology, or system administration |  6307 |  200 |\n| Mathematics or statistics                                             |  2236 |  582 |\n| Web development or web design                                         |  2397 |   21 |\n\n``` r\nsum(counts_major$Other)\n#> [1] 74774\nsum(counts_major$useR)\n#> [1] 4262\nknitr::kable(logratio_major)\n```\n\n| UndergradMajor                                                        |     Other |      useR |    logratio | Direction |\n| :-------------------------------------------------------------------- | --------: | --------: | ----------: | :-------- |\n| Mathematics or statistics                                             | 0.0299120 | 0.1364062 |   2.1891119 | useRs     |\n| A social science (ex. anthropology, psychology, political science)    | 0.0150162 | 0.0598971 |   1.9959672 | useRs     |\n| A natural science (ex. biology, chemistry, physics)                   | 0.0342577 | 0.1146467 |   1.7426926 | useRs     |\n| A health science (ex. nursing, pharmacy, radiology)                   | 0.0029150 | 0.0070192 |   1.2678157 | useRs     |\n| A business discipline (ex. accounting, finance, marketing)            | 0.0234135 | 0.0402433 |   0.7814108 | useRs     |\n| A humanities discipline (ex. literature, history, philosophy)         | 0.0198968 | 0.0243332 |   0.2903903 | useRs     |\n| Another engineering discipline (ex. civil, electrical, mechanical)    | 0.0879309 | 0.0868039 | \\-0.0186098 | Other     |\n| Computer science, computer engineering, or software engineering       | 0.6463910 | 0.4672438 | \\-0.4682317 | Other     |\n| Information systems, information technology, or system administration | 0.0843473 | 0.0470285 | \\-0.8428058 | Other     |\n| Fine arts or performing arts (ex. graphic design, music, studio art)  | 0.0147889 | 0.0072532 | \\-1.0278300 | Other     |\n| I never declared a major                                              | 0.0090659 | 0.0039775 | \\-1.1885692 | Other     |\n| Web development or web design                                         | 0.0320648 | 0.0051474 | \\-2.6390749 | Other     |\n\n``` r\np <- logratio_major %>% \n  group_by(Direction) %>% \n  ggplot(aes(UndergradMajor, logratio, fill = Direction)) +\n  geom_col(alpha = 0.9) +\n  coord_flip() +\n  scale_y_continuous(breaks = seq(-2, 2),\n                     labels = c(\"0.25x\", \"0.5x\", \"Same\", \"2x\", \"4x\"))\n\n## Julia's original\np +\n  labs(y = \"Relatively more from R users\", x = NULL,\n       fill = \"More likely from...\",\n       subtitle = \"R users are less likely to have formal programming training\",\n       title = \"What kinds of undergrad majors do R users have?\")       \n```\n\n![](README_files/figure-gfm/major-barchart-1.png)<!-- -->\n\n``` r\n\n## For use in Keynote\np +\n  labs(y = \"Relative prevalence\", x = NULL,\n       fill = \"Major is more common among\",\n       caption = \"Julia Silge & Jenny Bryan\\nSource: 2018 Stack Overflow Annual Developer Survey\") +\n  theme(\n    legend.position = \"top\",\n    legend.title = element_text(size = rel(1.4)),\n    axis.text.y = element_text(size = rel(1.3))\n  )\n```\n\n![](README_files/figure-gfm/major-barchart-2.png)<!-- -->\n"
  }
]