From ff893361e8ebc291cc177ba62d3cb5ff247bb660 Mon Sep 17 00:00:00 2001 From: mine-cetinkaya-rundel Date: Mon, 12 Dec 2022 13:35:15 -0500 Subject: [PATCH] Label x and y arguments in aes(), closes #1159 --- EDA.qmd | 2 +- factors.qmd | 5 +++-- functions.qmd | 26 +++++++++++++------------- layers.qmd | 4 ++-- logicals.qmd | 2 +- numbers.qmd | 10 +++++----- quarto/diamond-sizes.qmd | 2 +- regexps.qmd | 2 +- workflow-pipes.qmd | 2 +- workflow-scripts.qmd | 2 +- workflow-style.qmd | 4 ++-- 11 files changed, 31 insertions(+), 30 deletions(-) diff --git a/EDA.qmd b/EDA.qmd index 34e250a..c86fff8 100644 --- a/EDA.qmd +++ b/EDA.qmd @@ -471,7 +471,7 @@ You can do that by exchanging the x and y aesthetic mappings. #| on the y-axis and ordered by increasing median highway mileage. ggplot(mpg, - aes(y = fct_reorder(class, hwy, median), x = hwy)) + + aes(x = hwy, y = fct_reorder(class, hwy, median))) + geom_boxplot() ``` diff --git a/factors.qmd b/factors.qmd index 1f3d56b..77299e5 100644 --- a/factors.qmd +++ b/factors.qmd @@ -256,7 +256,8 @@ It takes a factor, `f`, and then any number of levels that you want to move to t #| bottom of the y-axis. Generally there is a positive association #| between income and age, and the income band with the highest average #| age is "Not applicable". -ggplot(rincome_summary, aes(age, fct_relevel(rincome, "Not applicable"))) + + +ggplot(rincome_summary, aes(x = age, y = fct_relevel(rincome, "Not applicable"))) + geom_point() ``` @@ -291,7 +292,7 @@ by_age <- gss_cat |> prop = n / sum(n) ) -ggplot(by_age, aes(age, prop, color = marital)) + +ggplot(by_age, aes(x = age, y = prop, color = marital)) + geom_line(na.rm = TRUE) ggplot(by_age, aes(x = age, y = prop, color = fct_reorder2(marital, age, prop))) + diff --git a/functions.qmd b/functions.qmd index 7b44347..e72089b 100644 --- a/functions.qmd +++ b/functions.qmd @@ -666,11 +666,11 @@ For example, imagine that you're making a lot of histograms: ```{r} #| fig-show: hide diamonds |> - ggplot(aes(carat)) + + ggplot(aes(x = carat)) + geom_histogram(binwidth = 0.1) diamonds |> - ggplot(aes(carat)) + + ggplot(aes(x = carat)) + geom_histogram(binwidth = 0.05) ``` @@ -680,7 +680,7 @@ This is easy as pie once you know that `aes()` is a data-masking function and yo ```{r} histogram <- function(df, var, binwidth = NULL) { df |> - ggplot(aes({{ var }})) + + ggplot(aes(x = {{ var }})) + geom_histogram(binwidth = binwidth) } @@ -705,7 +705,7 @@ For example, maybe you want an easy way to eyeball whether or not a data set is # https://twitter.com/tyler_js_smith/status/1574377116988104704 linearity_check <- function(df, x, y) { df |> - ggplot(aes({{ x }}, {{ y }})) + + ggplot(aes(x = {{ x }}, y = {{ y }})) + geom_point() + geom_smooth(method = "loess", color = "red", se = FALSE) + geom_smooth(method = "lm", color = "blue", se = FALSE) @@ -722,7 +722,7 @@ Or maybe you want an alternative to colored scatterplots for very large datasets # https://twitter.com/ppaxisa/status/1574398423175921665 hex_plot <- function(df, x, y, z, bins = 20, fun = "mean") { df |> - ggplot(aes({{ x }}, {{ y }}, z = {{ z }})) + + ggplot(aes(x = {{ x }}, y = {{ y }}, z = {{ z }})) + stat_summary_hex( aes(color = after_scale(fill)), # make border same color as fill bins = bins, @@ -760,7 +760,7 @@ Or maybe you want to make it easy to draw a bar plot just for a subset of the da conditional_bars <- function(df, condition, var) { df |> filter({{ condition }}) |> - ggplot(aes({{ var }})) + + ggplot(aes(x = {{ var }})) + geom_bar() } @@ -779,7 +779,7 @@ fancy_ts <- function(df, val, group) { summarize(breaks = max({{ val }})) df |> - ggplot(aes(date, {{ val }}, group = {{ group }}, color = {{ group }})) + + ggplot(aes(x = date, y = {{ val }}, group = {{ group }}, color = {{ group }})) + geom_path() + scale_y_continuous( breaks = labs$breaks, @@ -813,7 +813,7 @@ The only advantage of this syntax is that `vars()` uses tidy evaluation so you c ```{r} # https://twitter.com/sharoz/status/1574376332821204999 foo <- function(x) { - ggplot(mtcars, aes(mpg, disp)) + + ggplot(mtcars, aes(x = mpg, y = disp)) + geom_point() + facet_wrap(vars({{ x }})) } @@ -828,7 +828,7 @@ For example, the following function makes it particularly easy to interactively # https://twitter.com/yutannihilat_en/status/1574387230025875457 density <- function(color, facets, binwidth = 0.1) { diamonds |> - ggplot(aes(carat, after_stat(density), color = {{ color }})) + + ggplot(aes(x = carat, y = after_stat(density), color = {{ color }})) + geom_freqpoly(binwidth = binwidth) + facet_wrap(vars({{ facets }})) } @@ -845,7 +845,7 @@ Remember the histogram function we showed you earlier? ```{r} histogram <- function(df, var, binwidth = NULL) { df |> - ggplot(aes({{ var }})) + + ggplot(aes(x = {{ var }})) + geom_histogram(binwidth = binwidth) } ``` @@ -863,7 +863,7 @@ histogram <- function(df, var, binwidth) { label <- rlang::englue("A histogram of {{var}} with binwidth {binwidth}") df |> - ggplot(aes({{ var }})) + + ggplot(aes(x = {{ var }})) + geom_histogram(binwidth = binwidth) + labs(title = label) } @@ -917,7 +917,7 @@ This makes it easier to see the hierarchy in your code by skimming the left-hand # missing extra two spaces density <- function(color, facets, binwidth = 0.1) { diamonds |> - ggplot(aes(carat, after_stat(density), color = {{ color }})) + + ggplot(aes(x = carat, y = after_stat(density), color = {{ color }})) + geom_freqpoly(binwidth = binwidth) + facet_wrap(vars({{ facets }})) } @@ -925,7 +925,7 @@ diamonds |> # Pipe indented incorrectly density <- function(color, facets, binwidth = 0.1) { diamonds |> - ggplot(aes(carat, after_stat(density), color = {{ color }})) + + ggplot(aes(x = carat, y = after_stat(density), color = {{ color }})) + geom_freqpoly(binwidth = binwidth) + facet_wrap(vars({{ facets }})) } diff --git a/layers.qmd b/layers.qmd index 19bd1b2..811ef58 100644 --- a/layers.qmd +++ b/layers.qmd @@ -949,10 +949,10 @@ There are two other coordinate systems that are occasionally helpful. nz <- map_data("nz") - ggplot(nz, aes(long, lat, group = group)) + + ggplot(nz, aes(x = long, y = lat, group = group)) + geom_polygon(fill = "white", color = "black") - ggplot(nz, aes(long, lat, group = group)) + + ggplot(nz, aes(x = long, y = lat, group = group)) + geom_polygon(fill = "white", color = "black") + coord_quickmap() ``` diff --git a/logicals.qmd b/logicals.qmd index d39abe0..b95760d 100644 --- a/logicals.qmd +++ b/logicals.qmd @@ -365,7 +365,7 @@ flights |> prop_delayed = mean(arr_delay > 0, na.rm = TRUE), .groups = "drop" ) |> - ggplot(aes(prop_delayed)) + + ggplot(aes(x = prop_delayed)) + geom_histogram(binwidth = 0.05) ``` diff --git a/numbers.qmd b/numbers.qmd index 4cb6d23..24e8b91 100644 --- a/numbers.qmd +++ b/numbers.qmd @@ -436,7 +436,7 @@ slide_vec(x, sum, .before = 2, .after = 2, .complete = TRUE) ```{r} flights |> filter(month == 1, day == 1) |> - ggplot(aes(sched_dep_time, dep_delay)) + + ggplot(aes(x = sched_dep_time, y = dep_delay)) + geom_point() ``` @@ -649,7 +649,7 @@ flights |> n = n(), .groups = "drop" ) |> - ggplot(aes(mean, median)) + + ggplot(aes(x = mean, y = median)) + geom_abline(slope = 1, intercept = 0, color = "white", size = 2) + geom_point() ``` @@ -731,12 +731,12 @@ This suggests that the mean is unlikely to be a good summary and we might prefer #| fig-height: 2 flights |> - ggplot(aes(dep_delay)) + + ggplot(aes(x = dep_delay)) + geom_histogram(binwidth = 15) flights |> filter(dep_delay < 120) |> - ggplot(aes(dep_delay)) + + ggplot(aes(x = dep_delay)) + geom_histogram(binwidth = 5) ``` @@ -756,7 +756,7 @@ The distributions seem to follow a common pattern, suggesting it's fine to use t #| overlapping forming a thick black bland. flights |> filter(dep_delay < 120) |> - ggplot(aes(dep_delay, group = interaction(day, month))) + + ggplot(aes(x = dep_delay, group = interaction(day, month))) + geom_freqpoly(binwidth = 5, alpha = 1/5) ``` diff --git a/quarto/diamond-sizes.qmd b/quarto/diamond-sizes.qmd index 55a592a..234fe97 100644 --- a/quarto/diamond-sizes.qmd +++ b/quarto/diamond-sizes.qmd @@ -23,6 +23,6 @@ The distribution of the remainder is shown below: #| echo: false smaller |> - ggplot(aes(carat)) + + ggplot(aes(x = carat)) + geom_freqpoly(binwidth = 0.01) ``` diff --git a/regexps.qmd b/regexps.qmd index cd96198..633ac7a 100644 --- a/regexps.qmd +++ b/regexps.qmd @@ -166,7 +166,7 @@ It looks like they've radically increased in popularity lately! babynames |> group_by(year) |> summarize(prop_x = mean(str_detect(name, "x"))) |> - ggplot(aes(year, prop_x)) + + ggplot(aes(x = year, y = prop_x)) + geom_line() ``` diff --git a/workflow-pipes.qmd b/workflow-pipes.qmd index a082eaf..641a63f 100644 --- a/workflow-pipes.qmd +++ b/workflow-pipes.qmd @@ -140,7 +140,7 @@ We wish this transition wasn't necessary but unfortunately ggplot2 was created b diamonds |> count(cut, clarity) |> - ggplot(aes(clarity, cut, fill = n)) + + ggplot(aes(x = clarity, y = cut, fill = n)) + geom_tile() ``` diff --git a/workflow-scripts.qmd b/workflow-scripts.qmd index cd88307..bd0ec18 100644 --- a/workflow-scripts.qmd +++ b/workflow-scripts.qmd @@ -298,7 +298,7 @@ Don't worry about the details, you'll learn them later in the book. library(tidyverse) -ggplot(diamonds, aes(carat, price)) + +ggplot(diamonds, aes(x = carat, y = price)) + geom_hex() ggsave("diamonds.pdf") diff --git a/workflow-style.qmd b/workflow-style.qmd index 610198e..d452431 100644 --- a/workflow-style.qmd +++ b/workflow-style.qmd @@ -212,7 +212,7 @@ flights |> summarize( delay = mean(arr_delay, na.rm = TRUE) ) |> - ggplot(aes(month, delay)) + + ggplot(aes(x = month, y = delay)) + geom_point() + geom_line() ``` @@ -228,7 +228,7 @@ flights |> distance = mean(distance), speed = mean(air_time / distance, na.rm = TRUE) ) |> - ggplot(aes(distance, speed)) + + ggplot(aes(x = distance, y = speed)) + geom_smooth( method = "loess", span = 0.5,