diff --git a/EDA.qmd b/EDA.qmd index 83380a2..4405dab 100644 --- a/EDA.qmd +++ b/EDA.qmd @@ -341,7 +341,7 @@ nycflights13::flights |> sched_min = sched_dep_time %% 100, sched_dep_time = sched_hour + (sched_min / 60) ) |> - ggplot(aes(sched_dep_time)) + + ggplot(aes(x = sched_dep_time)) + geom_freqpoly(aes(color = cancelled), binwidth = 1/4) ``` diff --git a/communication.qmd b/communication.qmd index 3b974eb..7d7efb7 100644 --- a/communication.qmd +++ b/communication.qmd @@ -583,7 +583,7 @@ For example, take this plot that shows when each US president started and ended presidential |> mutate(id = 33 + row_number()) |> - ggplot(aes(start, id)) + + ggplot(aes(x = start, y = id)) + geom_point() + geom_segment(aes(xend = end, yend = id)) + scale_x_date(name = NULL, breaks = presidential$start, date_labels = "'%y") @@ -759,7 +759,7 @@ For example, if we map presidential party to color, we want to use the standard presidential |> mutate(id = 33 + row_number()) |> - ggplot(aes(start, id, color = party)) + + ggplot(aes(x = start, y = id, color = party)) + geom_point() + geom_segment(aes(xend = end, yend = id)) + scale_color_manual(values = c(Republican = "red", Democratic = "blue")) @@ -852,10 +852,10 @@ For example, if we extract two classes of cars and plot them separately, it's di suv <- mpg |> filter(class == "suv") compact <- mpg |> filter(class == "compact") -ggplot(suv, aes(displ, hwy, color = drv)) + +ggplot(suv, aes(x = displ, y = hwy, color = drv)) + geom_point() -ggplot(compact, aes(displ, hwy, color = drv)) + +ggplot(compact, aes(x = displ, y = hwy, color = drv)) + geom_point() ``` diff --git a/data-tidy.qmd b/data-tidy.qmd index 9356d87..d918398 100644 --- a/data-tidy.qmd +++ b/data-tidy.qmd @@ -121,7 +121,7 @@ table1 |> count(year, wt = cases) # Visualise changes over time -ggplot(table1, aes(year, cases)) + +ggplot(table1, aes(x = year, y = cases)) + geom_line(aes(group = country), color = "grey50") + geom_point(aes(color = country, shape = country)) + scale_x_continuous(breaks = c(1999, 2000)) @@ -249,7 +249,7 @@ The code is shown below and the result is @fig-billboard-ranks. #| >50. billboard_tidy |> - ggplot(aes(week, rank, group = track)) + + ggplot(aes(x = week, y = rank, group = track)) + geom_line(alpha = 1/3) + scale_y_reverse() ``` @@ -722,7 +722,7 @@ Depending on what you want to do next, you may find any of the following three s cms_patient_care |> filter(type == "observed") |> - ggplot(aes(score)) + + ggplot(aes(x = score)) + geom_histogram(binwidth = 2) + facet_wrap(vars(measure_abbr)) ``` @@ -739,7 +739,7 @@ Depending on what you want to do next, you may find any of the following three s names_from = measure_abbr, values_from = score ) |> - ggplot(aes(dyspnea_screening, dyspena_treatment)) + + ggplot(aes(x = dyspnea_screening, y = dyspena_treatment)) + geom_point() + coord_equal() ``` diff --git a/data-transform.qmd b/data-transform.qmd index f1a202f..e287c95 100644 --- a/data-transform.qmd +++ b/data-transform.qmd @@ -390,9 +390,9 @@ flights <- flights |> mutate( dep_sched = dep_time + dep_delay ) -ggplot(flights, aes(dep_sched)) + geom_histogram(binwidth = 60) -ggplot(flights, aes(dep_sched %% 60)) + geom_histogram(binwidth = 1) -ggplot(flights, aes(air_time - airtime2)) + geom_histogram() +ggplot(flights, aes(x = dep_sched)) + geom_histogram(binwidth = 60) +ggplot(flights, aes(x = dep_sched %% 60)) + geom_histogram(binwidth = 1) +ggplot(flights, aes(x = air_time - airtime2)) + geom_histogram() ``` 1. Compare `air_time` with `arr_time - dep_time`. @@ -601,7 +601,7 @@ delays <- flights |> n = n() ) -ggplot(delays, aes(delay)) + +ggplot(delays, aes(x = delay)) + geom_freqpoly(binwidth = 10) ``` @@ -615,7 +615,7 @@ That seems pretty surprising, so lets draw a scatterplot of number of flights vs #| (from -50 to ~300), but the variability rapidly decreases as the #| number of flights increases. -ggplot(delays, aes(n, delay)) + +ggplot(delays, aes(x = n, y = delay)) + geom_point(alpha = 1/10) ``` @@ -638,7 +638,7 @@ When looking at this sort of plot, it's often useful to filter out the groups wi delays |> filter(n > 25) |> - ggplot(aes(n, delay)) + + ggplot(aes(x = n, y = delay)) + geom_point(alpha = 1/10) + geom_smooth(se = FALSE) ``` @@ -676,7 +676,7 @@ When we plot the skill of the batter (measured by the batting average, `ba`) aga batters |> filter(n > 100) |> - ggplot(aes(n, perf)) + + ggplot(aes(x = n, y = perf)) + geom_point(alpha = 1 / 10) + geom_smooth(se = FALSE) ``` diff --git a/datetimes.qmd b/datetimes.qmd index 3428636..df4c463 100644 --- a/datetimes.qmd +++ b/datetimes.qmd @@ -228,7 +228,7 @@ With this data, we can visualize the distribution of departure times across the #| few flights in early Februrary, early July, late November, and late #| December. flights_dt |> - ggplot(aes(dep_time)) + + ggplot(aes(x = dep_time)) + geom_freqpoly(binwidth = 86400) # 86400 seconds = 1 day ``` @@ -243,7 +243,7 @@ Or within a single day: #| before 6am and after 8pm. flights_dt |> filter(dep_time < ymd(20130102)) |> - ggplot(aes(dep_time)) + + ggplot(aes(x = dep_time)) + geom_freqpoly(binwidth = 600) # 600 s = 10 minutes ``` @@ -355,7 +355,7 @@ flights_dt |> avg_delay = mean(dep_delay, na.rm = TRUE), n = n() ) |> - ggplot(aes(minute, avg_delay)) + + ggplot(aes(x = minute, y = avg_delay)) + geom_line() ``` @@ -375,7 +375,7 @@ sched_dep <- flights_dt |> n = n() ) -ggplot(sched_dep, aes(minute, avg_delay)) + +ggplot(sched_dep, aes(x = minute, y = avg_delay)) + geom_line() ``` @@ -396,7 +396,7 @@ Always be alert for this sort of pattern whenever you work with data that involv #| all most all flights are scheduled to depart on multiples of five, #| with a few extra at 15, 45, and 55 minutes. #| echo: false -ggplot(sched_dep, aes(minute, n)) + +ggplot(sched_dep, aes(x = minute, y = n)) + geom_line() ``` @@ -415,7 +415,7 @@ This, for example, allows us to plot the number of flights per week: #| weeks of the year (approximately 2,500 flights). flights_dt |> count(week = floor_date(dep_time, "week")) |> - ggplot(aes(week, n)) + + ggplot(aes(x = week, y = n)) + geom_line() + geom_point() ``` @@ -428,7 +428,7 @@ You can use rounding to show the distribution of flights across the course of a #| since midnight so it's hard to interpret. flights_dt |> mutate(dep_hour = dep_time - floor_date(dep_time, "day")) |> - ggplot(aes(dep_hour)) + + ggplot(aes(x = dep_hour)) + geom_freqpoly(binwidth = 60 * 30) ``` @@ -445,7 +445,7 @@ We can convert that to an `hms` object to get a more useful x-axis: #| around 12,000 per hour until 8pm, when they rapidly drop again. flights_dt |> mutate(dep_hour = hms::as_hms(dep_time - floor_date(dep_time, "day"))) |> - ggplot(aes(dep_hour)) + + ggplot(aes(x = dep_hour)) + geom_freqpoly(binwidth = 60 * 30) ``` diff --git a/factors.qmd b/factors.qmd index 67b42a7..ff64b36 100644 --- a/factors.qmd +++ b/factors.qmd @@ -147,7 +147,7 @@ Or with a bar chart: #| A bar chart showing the distribution of race. There are ~2000 #| records with race "Other", 3000 with race "Black", and other #| 15,000 with race "White". -ggplot(gss_cat, aes(race)) + +ggplot(gss_cat, aes(x = race)) + geom_bar() ``` @@ -185,7 +185,7 @@ relig_summary <- gss_cat |> n = n() ) -ggplot(relig_summary, aes(tvhours, relig)) + +ggplot(relig_summary, aes(x = tvhours, x = relig)) + geom_point() ``` @@ -202,7 +202,7 @@ We can improve it by reordering the levels of `relig` using `fct_reorder()`. #| The same scatterplot as above, but now the religion is displayed in #| increasing order of tvhours. "Other eastern" has the fewest tvhours #| under 2, and "Don't know" has the highest (over 5). -ggplot(relig_summary, aes(tvhours, fct_reorder(relig, tvhours))) + +ggplot(relig_summary, aes(x = tvhours, y = fct_reorder(relig, tvhours))) + geom_point() ``` @@ -218,7 +218,7 @@ relig_summary |> mutate( relig = fct_reorder(relig, tvhours) ) |> - ggplot(aes(tvhours, relig)) + + ggplot(aes(x = tvhours, y = relig)) + geom_point() ``` @@ -238,7 +238,7 @@ rincome_summary <- gss_cat |> n = n() ) -ggplot(rincome_summary, aes(age, fct_reorder(rincome, age))) + +ggplot(rincome_summary, aes(x = age, y = fct_reorder(rincome, age))) + geom_point() ``` @@ -294,7 +294,7 @@ by_age <- gss_cat |> ggplot(by_age, aes(age, prop, color = marital)) + geom_line(na.rm = TRUE) -ggplot(by_age, aes(age, prop, color = fct_reorder2(marital, age, prop))) + +ggplot(by_age, aes(x = age, y = prop, color = fct_reorder2(marital, age, prop))) + geom_line() + labs(color = "marital") ``` @@ -309,7 +309,7 @@ Combine it with `fct_rev()` if you want them in increasing frequency so that in #| (~3,000), never married (~5,000), married (~10,000). gss_cat |> mutate(marital = marital |> fct_infreq() |> fct_rev()) |> - ggplot(aes(marital)) + + ggplot(aes(x = marital)) + geom_bar() ``` diff --git a/iteration.qmd b/iteration.qmd index 6d8f001..380e4d5 100644 --- a/iteration.qmd +++ b/iteration.qmd @@ -972,7 +972,7 @@ Let's first make a function that draws the plot we want: ```{r} carat_histogram <- function(df) { - ggplot(df, aes(carat)) + geom_histogram(binwidth = 0.1) + ggplot(df, aes(x = carat)) + geom_histogram(binwidth = 0.1) } carat_histogram(by_clarity$data[[1]]) diff --git a/joins.qmd b/joins.qmd index daeb4ca..b77fc51 100644 --- a/joins.qmd +++ b/joins.qmd @@ -373,7 +373,7 @@ flights2 |> airports |> semi_join(flights, join_by(faa == dest)) |> - ggplot(aes(lon, lat)) + + ggplot(aes(x = lon, y = lat)) + borders("state") + geom_point() + coord_quickmap() @@ -394,7 +394,7 @@ flights2 |> summarize(delay = mean(arr_delay), n = n()) |> filter(n > 5) |> inner_join(airports, by = c("dest" = "faa")) |> - ggplot(aes(lon, lat)) + + ggplot(aes(x = lon, y = lat)) + borders("state") + geom_point(aes(size = n, color = delay)) + coord_quickmap() diff --git a/missing-values.qmd b/missing-values.qmd index 065f369..5358698 100644 --- a/missing-values.qmd +++ b/missing-values.qmd @@ -235,11 +235,11 @@ You can force them to display by supplying `drop = FALSE` to the appropriate dis #| #| The same bar chart as the last plot, but now with two values on #| the x-axis, "yes" and "no". There is no bar for the "yes" category. -ggplot(health, aes(smoker)) + +ggplot(health, aes(x = smoker)) + geom_bar() + scale_x_discrete() -ggplot(health, aes(smoker)) + +ggplot(health, aes(x = smoker)) + geom_bar() + scale_x_discrete(drop = FALSE) ``` diff --git a/quarto.qmd b/quarto.qmd index a1dc5f4..7328f52 100644 --- a/quarto.qmd +++ b/quarto.qmd @@ -412,7 +412,7 @@ To illustrate the principle, the following three plots have `fig-width` of 4, 6, ```{r} #| include: false -plot <- ggplot(mpg, aes(displ, hwy)) + geom_point() +plot <- ggplot(mpg, aes(x = displ, y = hwy)) + geom_point() ``` ```{r} diff --git a/quarto/fuel-economy.qmd b/quarto/fuel-economy.qmd index 8377d68..0d9d21a 100644 --- a/quarto/fuel-economy.qmd +++ b/quarto/fuel-economy.qmd @@ -18,7 +18,7 @@ class <- mpg |> filter(class == params$my_class) ```{r} #| message: false -ggplot(class, aes(displ, hwy)) + +ggplot(class, aes(x = displ, y = hwy)) + geom_point() + geom_smooth(se = FALSE) ```