master 25e1a11272d7 cached
22 files
25.7 KB
8.3k tokens
1 requests
Download .txt
Repository: jennybc/code-smells-and-feels
Branch: master
Commit: 25e1a11272d7
Files: 22
Total size: 25.7 KB

Directory structure:
gitextract_l_5jxohu/

├── .gitignore
├── R/
│   ├── 00_biz_comment-decomment.R
│   ├── 01_biz_if-else.R
│   ├── 02_biz_function.R
│   ├── 03_biz_complicated-conditions.R
│   ├── 04_biz_simplify-conditions.R
│   ├── 05_googledrive_is-parental.R
│   ├── 06_biz_stopifnot.R
│   ├── 07_get-some-data_before.R
│   ├── 08_get-some-data_after.R
│   ├── 09_googledrive_process-response.R
│   ├── 10_biz_if-else-apalooza.R
│   ├── 11_biz_S3.R
│   ├── 12_biz_switch.R
│   ├── 13_stringr_switch.R
│   ├── 14_age_case-when.R
│   └── 15_devtools_%||%.R
├── README.md
├── code-smells-and-feels.Rproj
├── resources/
│   └── code-smells.txt
└── stackoverflow-survey/
    ├── README.Rmd
    └── README.md

================================================
FILE CONTENTS
================================================

================================================
FILE: .gitignore
================================================
.Rhistory
.RData
.Rproj.user
developer_survey_2018
Refactoring_improving_the_design_of_existing_code.pdf
keynote*


================================================
FILE: R/00_biz_comment-decomment.R
================================================
x <- 1:5
#x <- c(TRUE, FALSE, FALSE, TRUE, FALSE)

cat(
  "The bizarro version of x is",
  -x,
  #!x,
  "\n"
)


================================================
FILE: R/01_biz_if-else.R
================================================
x <- 1:5
#x <- c(TRUE, FALSE, FALSE, TRUE, FALSE)

cat(
  "The bizarro version of x is",
  if (is.numeric(x)) {
    -x
  } else {
    !x
  },
  "\n"
)


================================================
FILE: R/02_biz_function.R
================================================
bizarro <- function(x) {
  if (is.numeric(x)) {
    -x
  } else {
    !x
  }
}

bizarro(1:5)

bizarro(c(TRUE, FALSE, FALSE, TRUE, FALSE))


================================================
FILE: R/03_biz_complicated-conditions.R
================================================
bizarro <- function(x) {
  if (class(x)[[1]] == "numeric" || class(x)[[1]] == "integer") {
    -x
  } else if (class(x)[[1]] == "logical") {
    !x
  } else {
    stop(
      "Don't know how to make bizzaro <", class(x)[[1]], ">",
      call. = FALSE)
  }
}

bizarro(c(TRUE, FALSE, FALSE, TRUE, FALSE))

bizarro(1:5)

bizarro(c("abc", "def"))


================================================
FILE: R/04_biz_simplify-conditions.R
================================================
bizarro <- function(x) {
  if (is.numeric(x)) {
    -x
  } else if (is.logical(x)) {
    !x
  } else {
    stop(
      "Don't know how to make bizzaro <", class(x)[[1]], ">",
      call. = FALSE)
  }
}

bizarro(c(TRUE, FALSE, FALSE, TRUE, FALSE))

bizarro(1:5)

bizarro(c("abc", "def"))


================================================
FILE: R/05_googledrive_is-parental.R
================================================
## https://github.com/tidyverse/googledrive/blob/b6e97999d0781c31c69ef6fa93d1091a04b44d20/R/drive_cp.R#L67

drive_cp <- function(file, ...) {
  file <- as_dribble(file)
  file <- confirm_single_file(file)
  if (is_parental(file)) {
    stop_glue("The Drive API does not copy folders or Team Drives.")
  }
  ...
}

## https://github.com/tidyverse/googledrive/blob/b6e97999d0781c31c69ef6fa93d1091a04b44d20/R/dribble.R#L253

is_parental <- function(d) {
  stopifnot(inherits(d, "dribble"))
  kind <- purrr::map_chr(d$drive_resource, "kind")
  mime_type <- purrr::map_chr(d$drive_resource, "mimeType", .default = NA)
  kind == "drive#teamDrive" | mime_type == "application/vnd.google-apps.folder"
}


================================================
FILE: R/06_biz_stopifnot.R
================================================
bizarro <- function(x) {
  stopifnot(is.numeric(x) || is.logical(x))
  
  if (is.numeric(x)) {
    -x
  } else {
    !x
  }
}

bizarro(c(TRUE, FALSE, FALSE, TRUE, FALSE))

bizarro(1:5)

bizarro(c("abc", "def"))


================================================
FILE: R/07_get-some-data_before.R
================================================
get_some_data <- function(config, outfile) {
  if (config_ok(config)) {
    if (can_write(outfile)) {
      if (can_open_network_connection(config)) {
        data <- parse_something_from_network()
        if(makes_sense(data)) {
          data <- beautify(data)
          write_it(data, outfile)
          return(TRUE)
        } else {
          return(FALSE)
        }
      } else {
        stop("Can't access network")
      }
    } else {
      ## uhm. What was this else for again?
    }
  } else {
    ## maybe, some bad news about ... the config? 
  }
}


================================================
FILE: R/08_get-some-data_after.R
================================================
get_some_data <- function(config, outfile) {
  if (config_bad(config)) {
    stop("Bad config")
  }
  
  if (!can_write(outfile)) {
    stop("Can't write outfile")
  }
  
  if (!can_open_network_connection(config)) {
    stop("Can't access network")
  }
  
  data <- parse_something_from_network()
  if(!makes_sense(data)) {
    return(FALSE)
  }
  
  data <- beautify(data)
  write_it(data, outfile)
  TRUE
}


================================================
FILE: R/09_googledrive_process-response.R
================================================
## https://github.com/tidyverse/googledrive/blob/94c8c01cbb9fd96fe59920cf58e39263a3135337/R/process_response.R#L8

process_response <- function(res) {
  if (httr::status_code(res) == 204) {
    return(TRUE)
  }
  
  if (httr::status_code(res) >= 200 && httr::status_code(res) < 300) {
    return(res %>%
             stop_for_content_type() %>%
             httr::content(as = "parsed", type = "application/json"))
  }
  
  ## 20+ more lines of error handling ...
}

================================================
FILE: R/10_biz_if-else-apalooza.R
================================================
str_reverse <- function(x) {
  vapply(
    strsplit(x, ""),
    FUN = function(z) paste(rev(z), collapse = ""),
    FUN.VALUE = "")
}
str_reverse(c("abc", "def"))

bizarro <- function(x) {
  if (is.numeric(x)) {
    -x
  } else if (is.logical(x)) {
    !x
  } else if (is.character(x)) {
    str_reverse(x)
  } else if (is.factor(x)) {
    levels(x) <- rev(levels(x))
    x
  } else { 
    stop(
      "Don't know how to make bizzaro <", class(x)[[1]], ">",
      call. = FALSE)
  }
}

bizarro(1:5)

bizarro(c(TRUE, FALSE, FALSE, TRUE, FALSE))

bizarro(c("abc", "def"))

(m <- factor(month.abb, levels = month.abb))
bizarro(factor(m))

bizarro(iris)


================================================
FILE: R/11_biz_S3.R
================================================
## snippets on slides created with lots of selective reprex()ing and toggling of
## the chunk options

#+ include = FALSE
str_reverse <- function(x) {
  vapply(
    strsplit(x, ""),
    FUN = function(z) paste(rev(z), collapse = ""),
    FUN.VALUE = "")
}

#+ include = FALSE
bizarro <- function(x) {
  UseMethod("bizarro")
}

bizarro.default <- function(x) {
  stop(
    "Don't know how to make bizzaro <",
    class(x)[[1]], ">",
    call. = FALSE
  )
}

#+ include = FALSE, eval = FALSE
bizarro(1:5)

bizarro(TRUE)

bizarro("abc")

#+ include = FALSE
bizarro.numeric <- function(x) -x

bizarro.logical <- function(x) !x

bizarro.character <- function(x) str_reverse(x)

bizarro.factor <- function(x) {
  levels(x) <- rev(levels(x))
  x
}

bizarro.data.frame <- function(x) {
  names(x) <- bizarro(names(x))
  x[] <- lapply(x, bizarro)
  x
}

#+ include = TRUE
bizarro(1:5)

bizarro(c(TRUE, FALSE, FALSE, TRUE, FALSE))

bizarro(c("abc", "def"))

(m <- factor(month.abb[1:3], levels = month.abb[1:3]))
bizarro(m)

bizarro(head(iris, 3))


================================================
FILE: R/12_biz_switch.R
================================================
#+ include = FALSE
str_reverse <- function(x) {
  vapply(
    strsplit(x, ""),
    FUN = function(z) paste(rev(z), collapse = ""),
    FUN.VALUE = "")
}

#+ include = TRUE
bizarro <- function(x) {
  cls <- class(x)[[1]] ## not a great idea, in general
  switch(
    cls,
    logical = !x,
    integer = ,
    numeric = -x,
    character = str_reverse(x),
    stop("Don't know how to make bizzaro <", cls, ">", call. = FALSE)
  )
}

bizarro(1:5)

bizarro(c(TRUE, FALSE, FALSE, TRUE, FALSE))

bizarro(c("abc", "def"))

bizarro(iris)


================================================
FILE: R/13_stringr_switch.R
================================================
## https://github.com/tidyverse/stringr/blob/e775974d2822ae0de90ab33cf9f02273c22a3801/R/pad.r#L30

str_pad <- function(string,
                    width,
                    side = c("left", "right", "both"),
                    pad = " ") {
  side <- match.arg(side)

  switch(
    side,
    left  =  stri_pad_left(string, width, pad = pad),
    right = stri_pad_right(string, width, pad = pad),
    both  =  stri_pad_both(string, width, pad = pad)
  )
}


================================================
FILE: R/14_age_case-when.R
================================================
library(tidyverse)

tibble(
  age_yrs = c(0, 4, 10, 15, 24, 55),
  age_cat = case_when(
    age_yrs < 2  ~ "baby",
    age_yrs < 13 ~ "kid",
    age_yrs < 20 ~ "teen",
    TRUE         ~ "adult"
  )
)

age_yrs <- c(0, 8, 15, 24, 55)

cat(
ifelse(age_yrs < 2, "baby",
       ifelse(age_yrs < 13, "kid",
              ifelse(age_yrs < 20, "teen",
                     "adult"
              )
       )
)
,sep="\n")




================================================
FILE: R/15_devtools_%||%.R
================================================
## https://github.com/r-lib/devtools/blob/b01edfbfa1fd0e3965a24188805c5e55f0d7376f/R/build-manual.R

#' Create package pdf manual
#'
#' @param pkg package path or name
#' @param path path in which to produce package manual
build_manual <- function(pkg = ".", path = NULL) {
  pkg <- as.package(pkg)
  path <- path %||% dirname(pkg$path)
  ...
}

## https://github.com/r-lib/devtools/blob/bd3bdf15b8f2e5e07d750de4360df28090a9f117/R/install-github.r#L73-L74
github_remote <- function(repo, username = NULL, ...) {
  meta <- parse_git_repo(repo)
  ...
  meta$username <- username %||%
    getOption("github.user") %||%
    stop("Unknown username.")
  ...
}

if (is.null(path)) {
  path <- dirname(pkg$path)
}

`%||%` <- function(x, y) {
  if (is_null(x)) y else x
}

f <- function(x, y = TRUE, z = NULL) {
  ...
  if (is.null(z)) {
    z <- much_logic(other, stuff, ...)
  }
  ...
}

f <- function(x, y = TRUE, z = NULL) {
  ...
  z <- z %||% much_logic(other, stuff, ...)
  ...
}


================================================
FILE: README.md
================================================
# Code Smells and Feels

Talk initially prepared for [useR!2018](https://user2018.r-project.org) Brisbane. Also delivered elsewhere, such as the [First Mexican Statistical Association School in Data Science](https://amestad.mx/escuela/1/)  
by Jenny Bryan  
[jennybryan.org](https://jennybryan.org)  
Twitter: [@jennyBryan](https://twitter.com/jennyBryan/)  
GitHub: [@jennybc](https://github.com/jennybc)  

> "Code smell" is an evocative term for that vague feeling of unease we get when reading certain bits of code. It's not necessarily wrong, but neither is it obviously correct. We may be reluctant to work on such code, because past experience suggests it's going to be fiddly and bug-prone. In contrast, there's another type of code that just feels good to read and work on. What's the difference? If we can be more precise about code smells and feels, we can be intentional about writing code that is easier and more pleasant to work on. I've been fortunate to spend the last couple years embedded in a group of developers working on the tidyverse and r-lib packages. Based on this experience, I'll talk about specific code smells and deodorizing strategies for R.

## Link to this repo

[rstd.io/code-smells](https://rstd.io/code-smells) is a shortlink to HERE

## Slides

<a href="https://speakerdeck.com/jennybc/code-smells-and-feels"><img src="2018-07_user-brisbane-400.jpeg"></a>

Slides [on SpeakerDeck](https://speakerdeck.com/jennybc/code-smells-and-feels)

Slides [as PDF file](2018-07_user-brisbane-bryan.pdf) here in this repo

## Video

Video is available on YouTube:  
<https://www.youtube.com/watch?v=7oyiPBjLAWY>

## Credits and resources

Annotated and hyperlink-y list of resources mentioned in the slides, in roughly the same order.

---

Do useRs have less formal training in CS/programming than others writing code?

2018 Stack Overflow Annual Developer Survey: <https://insights.stackoverflow.com/survey>

Adapted from original code by [Julia Silge](https://juliasilge.com), data scientist at  Stack Overflow.

Code here in this repo: [stackoverflow-survey](stackoverflow-survey)

---

Talks about programming style, workflow, and policies

Some that inspired me:

Good Programming Practice, UseR! 2004 Keynote, Martin Mächler  
http://www.ci.tuwien.ac.at/Conferences/useR-2004/  

What I find important when R Programming and Recent Cool Features in R  
2018 eRum Keynote by Martin Mächler and R Core Team  
http://stat.ethz.ch/~maechler/U/R/eRum_2018_ProgR-ALTREP.html  

My own efforts in this genre:

[Zen And The aRt Of Workflow Maintenance](https://speakerdeck.com/jennybc/zen-and-the-art-of-workflow-maintenance), IASC/NZSA 2017, Jenny Bryan

[Workflow: You should have one](https://speakerdeck.com/jennybc/workflow-you-should-have-one), EARL London 2017, Jenny Bryan

---

Cakes that look like hedgehogs ... sort of?

 * Beautiful hedgehog cake: [BBC goodfood recipe](https://www.bbcgoodfood.com/recipes/hedgehog-cake)
  * Homely hedgehog cake: [Reddit thread](https://www.reddit.com/r/funny/comments/1am3x7/so_a_friend_of_my_girlfriend_made_a_cake_for_her/), <http://i.imgur.com/peilfAh.jpg> 
  * Photos originally found at <https://www.boredpanda.com/funny-cake-fails-expectations-reality/>
  
---

[What Every Successful Person Knows, But Never Says](https://jamesclear.com/ira-glass-failure)  
James Clear blog post that discusses an Ira Glass interview. Indicative quote:

> All of us who do creative work, we get into it because we have good taste. But it's like there is this gap. For the first couple years that you're making stuff, what you're making isn't so good. It’s not that great. It’s trying to be good, it has ambition to be good, but it’s not that good.
>
> But your taste, the thing that got you into the game, is still killer. And your taste is good enough that you can tell that what you're making is kind of a disappointment to you.

---

The teams that bring you

  * tidyverse packages: [org members](https://github.com/orgs/tidyverse/people) and [outside collaborators](https://github.com/orgs/tidyverse/outside-collaborators)
  * r-lib packages: [org members](https://github.com/orgs/r-lib/people) and [outside collaborators](https://github.com/orgs/r-lib/outside-collaborators)
  
---

Refactoring  
Improving the Design of Existing Code  
by Martin Fowler  
(with Kent Beck, John Brant, William Opdyke, and Don Roberts)  
https://martinfowler.com/books/refactoring.html

Dumpster photo by NeONBRAND  
https://unsplash.com/photos/8Yk4T-tDSYY

---

Code Smells – a Short List  
blog post by Arne Mertz  
https://arne-mertz.de/2017/08/code-smells-short-list/

---

bizarro: all code snippets are given here in [R/](R)

Beach + glass orb photo by Perchek Industrie  
https://unsplash.com/photos/y-rmmZZfD1I

---

Good enough practices in scientific computing  
Wilson G, Bryan J, Cranston K, Kitzes J, Nederbragt L, et al. (2017) Good enough practices in scientific computing. PLOS Computational Biology 13(6): e1005510. <https://doi.org/10.1371/journal.pcbi.1005510>

> Do not comment and uncomment sections of code to control a program's behavior.

---

`if() else()` described in breathless AI style:  
you mean a one layer neural network with identity activation and no hidden layers

[Tweet](https://twitter.com/F_Vaggi/status/1011127587639197696) by [Federico Vaggi](https://twitter.com/F_Vaggi)

---

Return early and clearly  
Blog post by Arne Mertz  
https://arne-mertz.de/2016/12/early-return/  
"Handling preconditions" section is the basis of my early return before/after example `get_some_data()`.

More posts and conversations about early returns and avoiding if entirely:

  * [Avoid Else, Return Early](http://blog.timoxley.com/post/47041269194/avoid-else-return-early), blog post by Tim Oxley
  * [Anti-If: The missing patterns](https://code.joejag.com/2016/anti-if-the-missing-patterns.html), blog post by Joe Wright
  * Recent discussion on Hacker News: <https://news.ycombinator.com/item?id=17408836>

Yoda photo by Kory Westerhold on flickr  
https://www.flickr.com/photos/korymatthew/14211839966

---

Baby with diaper photo by rawpixel  
https://unsplash.com/photos/6RjllGKO88U

---

In addition to the **Refactoring** book referenced above, these are other good reads for improving your code:

The Art of Readable Code  
Simple and Practical Techniques for Writing Better Code  
Dustin Boswell, Trevor Foucher  
http://shop.oreilly.com/product/9780596802301.do

The Pragmatic Programmer  
From Journeyman to Master  
by Andrew Hunt and David Thomas  
https://pragprog.com/book/tpp/the-pragmatic-programmer

---

Upgrade your cargo cult for the win  
https://meaningness.com/metablog/upgrade-your-cargo-cult

Toddler on run bike photo by Jordan Sanchez  
https://unsplash.com/photos/Vbzx-yy5FoA


================================================
FILE: code-smells-and-feels.Rproj
================================================
Version: 1.0

RestoreWorkspace: No
SaveWorkspace: No
AlwaysSaveHistory: Default

EnableCodeIndexing: Yes
UseSpacesForTab: Yes
NumSpacesForTab: 2
Encoding: UTF-8

RnwWeave: Sweave
LaTeX: pdfLaTeX

AutoAppendNewline: Yes
StripTrailingWhitespace: Yes

BuildType: Package
PackageUseDevtools: Yes
PackageInstallArgs: --no-multiarch --with-keep.source
PackageRoxygenize: rd,collate,namespace


================================================
FILE: resources/code-smells.txt
================================================
Duplicated Code
Long Method
Large Class
Long Parameter List
Divergent Change
Shotgun Surgery
Feature Envy
Data Clumps
Primitive Obsession
Switch Statements
Parallel Inheritance Hierarchies
Lazy Class
Speculative Generality
Temporary Field
Message Chains
Middle Man
Inappropriate Intimacy
Alternative Classes with Different Interfaces
Incomplete Library Class
Data Class
Refused Bequest
Comments

================================================
FILE: stackoverflow-survey/README.Rmd
================================================
---
output: github_document
---

<!-- README.md is generated from README.Rmd. Please edit that file -->

```{r setup, include = FALSE}
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
)
```

# Stack Overflow survey analysis

Look at evidence for formal training in CS/programming among those who use R.

Use 2018 Stack Overflow Annual Developer Survey: <https://insights.stackoverflow.com/survey>

Original code from [Julia Silge](https://juliasilge.com), data scientist at  Stack Overflow. Modified by Jenny Bryan.

## Analysis

Load packages.

```{r}
library(here)
library(tidyverse)
library(scales)
```

Make sure we have the data.

```{r download-data}
survey_path <- here(
  "stackoverflow-survey/developer_survey_2018/survey_results_public.csv"
)
if (!file.exists(survey_path)) {
  ## consults Content-Description to get filename
  dl <- usethis:::download_zip(
    url = "https://drive.google.com/uc?export=download&id=1_9On2-nsBQIw3JiY43sWbrF8EjrqrR4U",
    destdir = here("stackoverflow-survey")
  )
  target <- here(
    "stackoverflow-survey",
    tools::file_path_sans_ext(basename(dl))
  )
  utils::unzip(dl, exdir = target)
  usethis::use_git_ignore(basename(target))
  unlink(dl)
}
```

Load the data.

```{r survey2018}
theme_set(theme_minimal(base_family="Source Sans Pro"))
survey2018 <- read_csv(survey_path)
```

What kinds of majors do R users have?

```{r wrangle}
users_majors <- survey2018 %>%
  select(Respondent, LanguageWorkedWith, UndergradMajor) %>%
  filter(!is.na(UndergradMajor)) %>%
  mutate(LanguageWorkedWith = str_split(LanguageWorkedWith, pattern = ";")) %>%
  unnest(LanguageWorkedWith) %>%
  group_by(Respondent) %>%
  summarize(UsesR = "R" %in% LanguageWorkedWith,
            UndergradMajor = first(UndergradMajor))

counts_major <- users_majors %>%
  count(UsesR, UndergradMajor) %>%
  mutate(UsesR = if_else(UsesR, "useR", "Other")) %>%
  spread(UsesR, n, fill = 0)

logratio_major <- counts_major %>%
  mutate_if(is.numeric, funs((. + 1) / sum(. + 1))) %>%
  mutate(logratio = log2(useR / Other)) %>%
  arrange(desc(logratio)) %>%
  mutate(
    UndergradMajor = reorder(UndergradMajor, logratio),
    Direction = factor(if_else(logratio > 0, "useRs", "Other")),
    Direction = forcats::fct_reorder(Direction, logratio, .desc = TRUE)
  )
```

```{r reveal-data}
knitr::kable(counts_major)
sum(counts_major$Other)
sum(counts_major$useR)
knitr::kable(logratio_major)
```

```{r major-barchart, fig.height = 6, fig.width = 11, dpi = 300}
p <- logratio_major %>% 
  group_by(Direction) %>% 
  ggplot(aes(UndergradMajor, logratio, fill = Direction)) +
  geom_col(alpha = 0.9) +
  coord_flip() +
  scale_y_continuous(breaks = seq(-2, 2),
                     labels = c("0.25x", "0.5x", "Same", "2x", "4x"))

## Julia's original
p +
  labs(y = "Relatively more from R users", x = NULL,
       fill = "More likely from...",
       subtitle = "R users are less likely to have formal programming training",
       title = "What kinds of undergrad majors do R users have?")       

## For use in Keynote
p +
  labs(y = "Relative prevalence", x = NULL,
       fill = "Major is more common among",
       caption = "Julia Silge & Jenny Bryan\nSource: 2018 Stack Overflow Annual Developer Survey") +
  theme(
    legend.position = "top",
    legend.title = element_text(size = rel(1.4)),
    axis.text.y = element_text(size = rel(1.3))
  )
```



================================================
FILE: stackoverflow-survey/README.md
================================================

<!-- README.md is generated from README.Rmd. Please edit that file -->

# Stack Overflow survey analysis

Look at evidence for formal training in CS/programming among those who
use R.

Use 2018 Stack Overflow Annual Developer Survey:
<https://insights.stackoverflow.com/survey>

Original code from [Julia Silge](https://juliasilge.com), data scientist
at Stack Overflow. Modified by Jenny Bryan.

## Analysis

Load packages.

``` r
library(here)
#> here() starts at /Users/jenny/talks/2018-07_user-brisbane
library(tidyverse)
#> ── Attaching packages ──────────────────────────────────────── tidyverse 1.2.1 ──
#> ✔ ggplot2 2.2.1     ✔ purrr   0.2.5
#> ✔ tibble  1.4.2     ✔ dplyr   0.7.6
#> ✔ tidyr   0.8.1     ✔ stringr 1.3.1
#> ✔ readr   1.1.1     ✔ forcats 0.3.0
#> ── Conflicts ─────────────────────────────────────────── tidyverse_conflicts() ──
#> ✖ dplyr::filter() masks stats::filter()
#> ✖ dplyr::lag()    masks stats::lag()
library(scales)
#> 
#> Attaching package: 'scales'
#> The following object is masked from 'package:purrr':
#> 
#>     discard
#> The following object is masked from 'package:readr':
#> 
#>     col_factor
```

Make sure we have the data.

``` r
survey_path <- here(
  "stackoverflow-survey/developer_survey_2018/survey_results_public.csv"
)
if (!file.exists(survey_path)) {
  ## consults Content-Description to get filename
  dl <- usethis:::download_zip(
    url = "https://drive.google.com/uc?export=download&id=1_9On2-nsBQIw3JiY43sWbrF8EjrqrR4U",
    destdir = here("stackoverflow-survey")
  )
  target <- here(
    "stackoverflow-survey",
    tools::file_path_sans_ext(basename(dl))
  )
  utils::unzip(dl, exdir = target)
  usethis::use_git_ignore(basename(target))
  unlink(dl)
}
```

Load the data.

``` r
theme_set(theme_minimal(base_family="Source Sans Pro"))
survey2018 <- read_csv(survey_path)
#> Parsed with column specification:
#> cols(
#>   .default = col_character(),
#>   Respondent = col_integer(),
#>   AssessJob1 = col_integer(),
#>   AssessJob2 = col_integer(),
#>   AssessJob3 = col_integer(),
#>   AssessJob4 = col_integer(),
#>   AssessJob5 = col_integer(),
#>   AssessJob6 = col_integer(),
#>   AssessJob7 = col_integer(),
#>   AssessJob8 = col_integer(),
#>   AssessJob9 = col_integer(),
#>   AssessJob10 = col_integer(),
#>   AssessBenefits1 = col_integer(),
#>   AssessBenefits2 = col_integer(),
#>   AssessBenefits3 = col_integer(),
#>   AssessBenefits4 = col_integer(),
#>   AssessBenefits5 = col_integer(),
#>   AssessBenefits6 = col_integer(),
#>   AssessBenefits7 = col_integer(),
#>   AssessBenefits8 = col_integer(),
#>   AssessBenefits9 = col_integer()
#>   # ... with 23 more columns
#> )
#> See spec(...) for full column specifications.
```

What kinds of majors do R users have?

``` r
users_majors <- survey2018 %>%
  select(Respondent, LanguageWorkedWith, UndergradMajor) %>%
  filter(!is.na(UndergradMajor)) %>%
  mutate(LanguageWorkedWith = str_split(LanguageWorkedWith, pattern = ";")) %>%
  unnest(LanguageWorkedWith) %>%
  group_by(Respondent) %>%
  summarize(UsesR = "R" %in% LanguageWorkedWith,
            UndergradMajor = first(UndergradMajor))

counts_major <- users_majors %>%
  count(UsesR, UndergradMajor) %>%
  mutate(UsesR = if_else(UsesR, "useR", "Other")) %>%
  spread(UsesR, n, fill = 0)

logratio_major <- counts_major %>%
  mutate_if(is.numeric, funs((. + 1) / sum(. + 1))) %>%
  mutate(logratio = log2(useR / Other)) %>%
  arrange(desc(logratio)) %>%
  mutate(
    UndergradMajor = reorder(UndergradMajor, logratio),
    Direction = factor(if_else(logratio > 0, "useRs", "Other")),
    Direction = forcats::fct_reorder(Direction, logratio, .desc = TRUE)
  )
```

``` r
knitr::kable(counts_major)
```

| UndergradMajor                                                        | Other | useR |
| :-------------------------------------------------------------------- | ----: | ---: |
| A business discipline (ex. accounting, finance, marketing)            |  1750 |  171 |
| A health science (ex. nursing, pharmacy, radiology)                   |   217 |   29 |
| A humanities discipline (ex. literature, history, philosophy)         |  1487 |  103 |
| A natural science (ex. biology, chemistry, physics)                   |  2561 |  489 |
| A social science (ex. anthropology, psychology, political science)    |  1122 |  255 |
| Another engineering discipline (ex. civil, electrical, mechanical)    |  6575 |  370 |
| Computer science, computer engineering, or software engineering       | 48340 | 1996 |
| Fine arts or performing arts (ex. graphic design, music, studio art)  |  1105 |   30 |
| I never declared a major                                              |   677 |   16 |
| Information systems, information technology, or system administration |  6307 |  200 |
| Mathematics or statistics                                             |  2236 |  582 |
| Web development or web design                                         |  2397 |   21 |

``` r
sum(counts_major$Other)
#> [1] 74774
sum(counts_major$useR)
#> [1] 4262
knitr::kable(logratio_major)
```

| UndergradMajor                                                        |     Other |      useR |    logratio | Direction |
| :-------------------------------------------------------------------- | --------: | --------: | ----------: | :-------- |
| Mathematics or statistics                                             | 0.0299120 | 0.1364062 |   2.1891119 | useRs     |
| A social science (ex. anthropology, psychology, political science)    | 0.0150162 | 0.0598971 |   1.9959672 | useRs     |
| A natural science (ex. biology, chemistry, physics)                   | 0.0342577 | 0.1146467 |   1.7426926 | useRs     |
| A health science (ex. nursing, pharmacy, radiology)                   | 0.0029150 | 0.0070192 |   1.2678157 | useRs     |
| A business discipline (ex. accounting, finance, marketing)            | 0.0234135 | 0.0402433 |   0.7814108 | useRs     |
| A humanities discipline (ex. literature, history, philosophy)         | 0.0198968 | 0.0243332 |   0.2903903 | useRs     |
| Another engineering discipline (ex. civil, electrical, mechanical)    | 0.0879309 | 0.0868039 | \-0.0186098 | Other     |
| Computer science, computer engineering, or software engineering       | 0.6463910 | 0.4672438 | \-0.4682317 | Other     |
| Information systems, information technology, or system administration | 0.0843473 | 0.0470285 | \-0.8428058 | Other     |
| Fine arts or performing arts (ex. graphic design, music, studio art)  | 0.0147889 | 0.0072532 | \-1.0278300 | Other     |
| I never declared a major                                              | 0.0090659 | 0.0039775 | \-1.1885692 | Other     |
| Web development or web design                                         | 0.0320648 | 0.0051474 | \-2.6390749 | Other     |

``` r
p <- logratio_major %>% 
  group_by(Direction) %>% 
  ggplot(aes(UndergradMajor, logratio, fill = Direction)) +
  geom_col(alpha = 0.9) +
  coord_flip() +
  scale_y_continuous(breaks = seq(-2, 2),
                     labels = c("0.25x", "0.5x", "Same", "2x", "4x"))

## Julia's original
p +
  labs(y = "Relatively more from R users", x = NULL,
       fill = "More likely from...",
       subtitle = "R users are less likely to have formal programming training",
       title = "What kinds of undergrad majors do R users have?")       
```

![](README_files/figure-gfm/major-barchart-1.png)<!-- -->

``` r

## For use in Keynote
p +
  labs(y = "Relative prevalence", x = NULL,
       fill = "Major is more common among",
       caption = "Julia Silge & Jenny Bryan\nSource: 2018 Stack Overflow Annual Developer Survey") +
  theme(
    legend.position = "top",
    legend.title = element_text(size = rel(1.4)),
    axis.text.y = element_text(size = rel(1.3))
  )
```

![](README_files/figure-gfm/major-barchart-2.png)<!-- -->
Download .txt
gitextract_l_5jxohu/

├── .gitignore
├── R/
│   ├── 00_biz_comment-decomment.R
│   ├── 01_biz_if-else.R
│   ├── 02_biz_function.R
│   ├── 03_biz_complicated-conditions.R
│   ├── 04_biz_simplify-conditions.R
│   ├── 05_googledrive_is-parental.R
│   ├── 06_biz_stopifnot.R
│   ├── 07_get-some-data_before.R
│   ├── 08_get-some-data_after.R
│   ├── 09_googledrive_process-response.R
│   ├── 10_biz_if-else-apalooza.R
│   ├── 11_biz_S3.R
│   ├── 12_biz_switch.R
│   ├── 13_stringr_switch.R
│   ├── 14_age_case-when.R
│   └── 15_devtools_%||%.R
├── README.md
├── code-smells-and-feels.Rproj
├── resources/
│   └── code-smells.txt
└── stackoverflow-survey/
    ├── README.Rmd
    └── README.md
Condensed preview — 22 files, each showing path, character count, and a content snippet. Download the .json file or copy for the full structured content (29K chars).
[
  {
    "path": ".gitignore",
    "chars": 114,
    "preview": ".Rhistory\n.RData\n.Rproj.user\ndeveloper_survey_2018\nRefactoring_improving_the_design_of_existing_code.pdf\nkeynote*\n"
  },
  {
    "path": "R/00_biz_comment-decomment.R",
    "chars": 111,
    "preview": "x <- 1:5\n#x <- c(TRUE, FALSE, FALSE, TRUE, FALSE)\n\ncat(\n  \"The bizarro version of x is\",\n  -x,\n  #!x,\n  \"\\n\"\n)\n"
  },
  {
    "path": "R/01_biz_if-else.R",
    "chars": 151,
    "preview": "x <- 1:5\n#x <- c(TRUE, FALSE, FALSE, TRUE, FALSE)\n\ncat(\n  \"The bizarro version of x is\",\n  if (is.numeric(x)) {\n    -x\n "
  },
  {
    "path": "R/02_biz_function.R",
    "chars": 138,
    "preview": "bizarro <- function(x) {\n  if (is.numeric(x)) {\n    -x\n  } else {\n    !x\n  }\n}\n\nbizarro(1:5)\n\nbizarro(c(TRUE, FALSE, FAL"
  },
  {
    "path": "R/03_biz_complicated-conditions.R",
    "chars": 343,
    "preview": "bizarro <- function(x) {\n  if (class(x)[[1]] == \"numeric\" || class(x)[[1]] == \"integer\") {\n    -x\n  } else if (class(x)["
  },
  {
    "path": "R/04_biz_simplify-conditions.R",
    "chars": 287,
    "preview": "bizarro <- function(x) {\n  if (is.numeric(x)) {\n    -x\n  } else if (is.logical(x)) {\n    !x\n  } else {\n    stop(\n      \""
  },
  {
    "path": "R/05_googledrive_is-parental.R",
    "chars": 695,
    "preview": "## https://github.com/tidyverse/googledrive/blob/b6e97999d0781c31c69ef6fa93d1091a04b44d20/R/drive_cp.R#L67\n\ndrive_cp <- "
  },
  {
    "path": "R/06_biz_stopifnot.R",
    "chars": 211,
    "preview": "bizarro <- function(x) {\n  stopifnot(is.numeric(x) || is.logical(x))\n  \n  if (is.numeric(x)) {\n    -x\n  } else {\n    !x\n"
  },
  {
    "path": "R/07_get-some-data_before.R",
    "chars": 562,
    "preview": "get_some_data <- function(config, outfile) {\n  if (config_ok(config)) {\n    if (can_write(outfile)) {\n      if (can_open"
  },
  {
    "path": "R/08_get-some-data_after.R",
    "chars": 410,
    "preview": "get_some_data <- function(config, outfile) {\n  if (config_bad(config)) {\n    stop(\"Bad config\")\n  }\n  \n  if (!can_write("
  },
  {
    "path": "R/09_googledrive_process-response.R",
    "chars": 465,
    "preview": "## https://github.com/tidyverse/googledrive/blob/94c8c01cbb9fd96fe59920cf58e39263a3135337/R/process_response.R#L8\n\nproce"
  },
  {
    "path": "R/10_biz_if-else-apalooza.R",
    "chars": 650,
    "preview": "str_reverse <- function(x) {\n  vapply(\n    strsplit(x, \"\"),\n    FUN = function(z) paste(rev(z), collapse = \"\"),\n    FUN."
  },
  {
    "path": "R/11_biz_S3.R",
    "chars": 1038,
    "preview": "## snippets on slides created with lots of selective reprex()ing and toggling of\n## the chunk options\n\n#+ include = FALS"
  },
  {
    "path": "R/12_biz_switch.R",
    "chars": 531,
    "preview": "#+ include = FALSE\nstr_reverse <- function(x) {\n  vapply(\n    strsplit(x, \"\"),\n    FUN = function(z) paste(rev(z), colla"
  },
  {
    "path": "R/13_stringr_switch.R",
    "chars": 456,
    "preview": "## https://github.com/tidyverse/stringr/blob/e775974d2822ae0de90ab33cf9f02273c22a3801/R/pad.r#L30\n\nstr_pad <- function(s"
  },
  {
    "path": "R/14_age_case-when.R",
    "chars": 414,
    "preview": "library(tidyverse)\n\ntibble(\n  age_yrs = c(0, 4, 10, 15, 24, 55),\n  age_cat = case_when(\n    age_yrs < 2  ~ \"baby\",\n    a"
  },
  {
    "path": "R/15_devtools_%||%.R",
    "chars": 978,
    "preview": "## https://github.com/r-lib/devtools/blob/b01edfbfa1fd0e3965a24188805c5e55f0d7376f/R/build-manual.R\n\n#' Create package p"
  },
  {
    "path": "README.md",
    "chars": 6781,
    "preview": "# Code Smells and Feels\n\nTalk initially prepared for [useR!2018](https://user2018.r-project.org) Brisbane. Also delivere"
  },
  {
    "path": "code-smells-and-feels.Rproj",
    "chars": 386,
    "preview": "Version: 1.0\n\nRestoreWorkspace: No\nSaveWorkspace: No\nAlwaysSaveHistory: Default\n\nEnableCodeIndexing: Yes\nUseSpacesForTab"
  },
  {
    "path": "resources/code-smells.txt",
    "chars": 394,
    "preview": "Duplicated Code\nLong Method\nLarge Class\nLong Parameter List\nDivergent Change\nShotgun Surgery\nFeature Envy\nData Clumps\nPr"
  },
  {
    "path": "stackoverflow-survey/README.Rmd",
    "chars": 3386,
    "preview": "---\noutput: github_document\n---\n\n<!-- README.md is generated from README.Rmd. Please edit that file -->\n\n```{r setup, in"
  },
  {
    "path": "stackoverflow-survey/README.md",
    "chars": 7824,
    "preview": "\n<!-- README.md is generated from README.Rmd. Please edit that file -->\n\n# Stack Overflow survey analysis\n\nLook at evide"
  }
]

About this extraction

This page contains the full source code of the jennybc/code-smells-and-feels GitHub repository, extracted and formatted as plain text for AI agents and large language models (LLMs). The extraction includes 22 files (25.7 KB), approximately 8.3k tokens. Use this with OpenClaw, Claude, ChatGPT, Cursor, Windsurf, or any other AI tool that accepts text input. You can copy the full output to your clipboard or download it as a .txt file.

Extracted by GitExtract — free GitHub repo to text converter for AI. Built by Nikandr Surkov.

Copied to clipboard!