Add x and y to aes(), addresses #1159
This commit is contained in:
parent
062ab1666d
commit
42191c94e5
2
EDA.qmd
2
EDA.qmd
|
@ -341,7 +341,7 @@ nycflights13::flights |>
|
||||||
sched_min = sched_dep_time %% 100,
|
sched_min = sched_dep_time %% 100,
|
||||||
sched_dep_time = sched_hour + (sched_min / 60)
|
sched_dep_time = sched_hour + (sched_min / 60)
|
||||||
) |>
|
) |>
|
||||||
ggplot(aes(sched_dep_time)) +
|
ggplot(aes(x = sched_dep_time)) +
|
||||||
geom_freqpoly(aes(color = cancelled), binwidth = 1/4)
|
geom_freqpoly(aes(color = cancelled), binwidth = 1/4)
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
|
@ -583,7 +583,7 @@ For example, take this plot that shows when each US president started and ended
|
||||||
|
|
||||||
presidential |>
|
presidential |>
|
||||||
mutate(id = 33 + row_number()) |>
|
mutate(id = 33 + row_number()) |>
|
||||||
ggplot(aes(start, id)) +
|
ggplot(aes(x = start, y = id)) +
|
||||||
geom_point() +
|
geom_point() +
|
||||||
geom_segment(aes(xend = end, yend = id)) +
|
geom_segment(aes(xend = end, yend = id)) +
|
||||||
scale_x_date(name = NULL, breaks = presidential$start, date_labels = "'%y")
|
scale_x_date(name = NULL, breaks = presidential$start, date_labels = "'%y")
|
||||||
|
@ -759,7 +759,7 @@ For example, if we map presidential party to color, we want to use the standard
|
||||||
|
|
||||||
presidential |>
|
presidential |>
|
||||||
mutate(id = 33 + row_number()) |>
|
mutate(id = 33 + row_number()) |>
|
||||||
ggplot(aes(start, id, color = party)) +
|
ggplot(aes(x = start, y = id, color = party)) +
|
||||||
geom_point() +
|
geom_point() +
|
||||||
geom_segment(aes(xend = end, yend = id)) +
|
geom_segment(aes(xend = end, yend = id)) +
|
||||||
scale_color_manual(values = c(Republican = "red", Democratic = "blue"))
|
scale_color_manual(values = c(Republican = "red", Democratic = "blue"))
|
||||||
|
@ -852,10 +852,10 @@ For example, if we extract two classes of cars and plot them separately, it's di
|
||||||
suv <- mpg |> filter(class == "suv")
|
suv <- mpg |> filter(class == "suv")
|
||||||
compact <- mpg |> filter(class == "compact")
|
compact <- mpg |> filter(class == "compact")
|
||||||
|
|
||||||
ggplot(suv, aes(displ, hwy, color = drv)) +
|
ggplot(suv, aes(x = displ, y = hwy, color = drv)) +
|
||||||
geom_point()
|
geom_point()
|
||||||
|
|
||||||
ggplot(compact, aes(displ, hwy, color = drv)) +
|
ggplot(compact, aes(x = displ, y = hwy, color = drv)) +
|
||||||
geom_point()
|
geom_point()
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
|
@ -121,7 +121,7 @@ table1 |>
|
||||||
count(year, wt = cases)
|
count(year, wt = cases)
|
||||||
|
|
||||||
# Visualise changes over time
|
# Visualise changes over time
|
||||||
ggplot(table1, aes(year, cases)) +
|
ggplot(table1, aes(x = year, y = cases)) +
|
||||||
geom_line(aes(group = country), color = "grey50") +
|
geom_line(aes(group = country), color = "grey50") +
|
||||||
geom_point(aes(color = country, shape = country)) +
|
geom_point(aes(color = country, shape = country)) +
|
||||||
scale_x_continuous(breaks = c(1999, 2000))
|
scale_x_continuous(breaks = c(1999, 2000))
|
||||||
|
@ -249,7 +249,7 @@ The code is shown below and the result is @fig-billboard-ranks.
|
||||||
#| >50.
|
#| >50.
|
||||||
|
|
||||||
billboard_tidy |>
|
billboard_tidy |>
|
||||||
ggplot(aes(week, rank, group = track)) +
|
ggplot(aes(x = week, y = rank, group = track)) +
|
||||||
geom_line(alpha = 1/3) +
|
geom_line(alpha = 1/3) +
|
||||||
scale_y_reverse()
|
scale_y_reverse()
|
||||||
```
|
```
|
||||||
|
@ -722,7 +722,7 @@ Depending on what you want to do next, you may find any of the following three s
|
||||||
|
|
||||||
cms_patient_care |>
|
cms_patient_care |>
|
||||||
filter(type == "observed") |>
|
filter(type == "observed") |>
|
||||||
ggplot(aes(score)) +
|
ggplot(aes(x = score)) +
|
||||||
geom_histogram(binwidth = 2) +
|
geom_histogram(binwidth = 2) +
|
||||||
facet_wrap(vars(measure_abbr))
|
facet_wrap(vars(measure_abbr))
|
||||||
```
|
```
|
||||||
|
@ -739,7 +739,7 @@ Depending on what you want to do next, you may find any of the following three s
|
||||||
names_from = measure_abbr,
|
names_from = measure_abbr,
|
||||||
values_from = score
|
values_from = score
|
||||||
) |>
|
) |>
|
||||||
ggplot(aes(dyspnea_screening, dyspena_treatment)) +
|
ggplot(aes(x = dyspnea_screening, y = dyspena_treatment)) +
|
||||||
geom_point() +
|
geom_point() +
|
||||||
coord_equal()
|
coord_equal()
|
||||||
```
|
```
|
||||||
|
|
|
@ -390,9 +390,9 @@ flights <- flights |> mutate(
|
||||||
dep_sched = dep_time + dep_delay
|
dep_sched = dep_time + dep_delay
|
||||||
)
|
)
|
||||||
|
|
||||||
ggplot(flights, aes(dep_sched)) + geom_histogram(binwidth = 60)
|
ggplot(flights, aes(x = dep_sched)) + geom_histogram(binwidth = 60)
|
||||||
ggplot(flights, aes(dep_sched %% 60)) + geom_histogram(binwidth = 1)
|
ggplot(flights, aes(x = dep_sched %% 60)) + geom_histogram(binwidth = 1)
|
||||||
ggplot(flights, aes(air_time - airtime2)) + geom_histogram()
|
ggplot(flights, aes(x = air_time - airtime2)) + geom_histogram()
|
||||||
```
|
```
|
||||||
|
|
||||||
1. Compare `air_time` with `arr_time - dep_time`.
|
1. Compare `air_time` with `arr_time - dep_time`.
|
||||||
|
@ -601,7 +601,7 @@ delays <- flights |>
|
||||||
n = n()
|
n = n()
|
||||||
)
|
)
|
||||||
|
|
||||||
ggplot(delays, aes(delay)) +
|
ggplot(delays, aes(x = delay)) +
|
||||||
geom_freqpoly(binwidth = 10)
|
geom_freqpoly(binwidth = 10)
|
||||||
```
|
```
|
||||||
|
|
||||||
|
@ -615,7 +615,7 @@ That seems pretty surprising, so lets draw a scatterplot of number of flights vs
|
||||||
#| (from -50 to ~300), but the variability rapidly decreases as the
|
#| (from -50 to ~300), but the variability rapidly decreases as the
|
||||||
#| number of flights increases.
|
#| number of flights increases.
|
||||||
|
|
||||||
ggplot(delays, aes(n, delay)) +
|
ggplot(delays, aes(x = n, y = delay)) +
|
||||||
geom_point(alpha = 1/10)
|
geom_point(alpha = 1/10)
|
||||||
```
|
```
|
||||||
|
|
||||||
|
@ -638,7 +638,7 @@ When looking at this sort of plot, it's often useful to filter out the groups wi
|
||||||
|
|
||||||
delays |>
|
delays |>
|
||||||
filter(n > 25) |>
|
filter(n > 25) |>
|
||||||
ggplot(aes(n, delay)) +
|
ggplot(aes(x = n, y = delay)) +
|
||||||
geom_point(alpha = 1/10) +
|
geom_point(alpha = 1/10) +
|
||||||
geom_smooth(se = FALSE)
|
geom_smooth(se = FALSE)
|
||||||
```
|
```
|
||||||
|
@ -676,7 +676,7 @@ When we plot the skill of the batter (measured by the batting average, `ba`) aga
|
||||||
|
|
||||||
batters |>
|
batters |>
|
||||||
filter(n > 100) |>
|
filter(n > 100) |>
|
||||||
ggplot(aes(n, perf)) +
|
ggplot(aes(x = n, y = perf)) +
|
||||||
geom_point(alpha = 1 / 10) +
|
geom_point(alpha = 1 / 10) +
|
||||||
geom_smooth(se = FALSE)
|
geom_smooth(se = FALSE)
|
||||||
```
|
```
|
||||||
|
|
|
@ -228,7 +228,7 @@ With this data, we can visualize the distribution of departure times across the
|
||||||
#| few flights in early Februrary, early July, late November, and late
|
#| few flights in early Februrary, early July, late November, and late
|
||||||
#| December.
|
#| December.
|
||||||
flights_dt |>
|
flights_dt |>
|
||||||
ggplot(aes(dep_time)) +
|
ggplot(aes(x = dep_time)) +
|
||||||
geom_freqpoly(binwidth = 86400) # 86400 seconds = 1 day
|
geom_freqpoly(binwidth = 86400) # 86400 seconds = 1 day
|
||||||
```
|
```
|
||||||
|
|
||||||
|
@ -243,7 +243,7 @@ Or within a single day:
|
||||||
#| before 6am and after 8pm.
|
#| before 6am and after 8pm.
|
||||||
flights_dt |>
|
flights_dt |>
|
||||||
filter(dep_time < ymd(20130102)) |>
|
filter(dep_time < ymd(20130102)) |>
|
||||||
ggplot(aes(dep_time)) +
|
ggplot(aes(x = dep_time)) +
|
||||||
geom_freqpoly(binwidth = 600) # 600 s = 10 minutes
|
geom_freqpoly(binwidth = 600) # 600 s = 10 minutes
|
||||||
```
|
```
|
||||||
|
|
||||||
|
@ -355,7 +355,7 @@ flights_dt |>
|
||||||
avg_delay = mean(dep_delay, na.rm = TRUE),
|
avg_delay = mean(dep_delay, na.rm = TRUE),
|
||||||
n = n()
|
n = n()
|
||||||
) |>
|
) |>
|
||||||
ggplot(aes(minute, avg_delay)) +
|
ggplot(aes(x = minute, y = avg_delay)) +
|
||||||
geom_line()
|
geom_line()
|
||||||
```
|
```
|
||||||
|
|
||||||
|
@ -375,7 +375,7 @@ sched_dep <- flights_dt |>
|
||||||
n = n()
|
n = n()
|
||||||
)
|
)
|
||||||
|
|
||||||
ggplot(sched_dep, aes(minute, avg_delay)) +
|
ggplot(sched_dep, aes(x = minute, y = avg_delay)) +
|
||||||
geom_line()
|
geom_line()
|
||||||
```
|
```
|
||||||
|
|
||||||
|
@ -396,7 +396,7 @@ Always be alert for this sort of pattern whenever you work with data that involv
|
||||||
#| all most all flights are scheduled to depart on multiples of five,
|
#| all most all flights are scheduled to depart on multiples of five,
|
||||||
#| with a few extra at 15, 45, and 55 minutes.
|
#| with a few extra at 15, 45, and 55 minutes.
|
||||||
#| echo: false
|
#| echo: false
|
||||||
ggplot(sched_dep, aes(minute, n)) +
|
ggplot(sched_dep, aes(x = minute, y = n)) +
|
||||||
geom_line()
|
geom_line()
|
||||||
```
|
```
|
||||||
|
|
||||||
|
@ -415,7 +415,7 @@ This, for example, allows us to plot the number of flights per week:
|
||||||
#| weeks of the year (approximately 2,500 flights).
|
#| weeks of the year (approximately 2,500 flights).
|
||||||
flights_dt |>
|
flights_dt |>
|
||||||
count(week = floor_date(dep_time, "week")) |>
|
count(week = floor_date(dep_time, "week")) |>
|
||||||
ggplot(aes(week, n)) +
|
ggplot(aes(x = week, y = n)) +
|
||||||
geom_line() +
|
geom_line() +
|
||||||
geom_point()
|
geom_point()
|
||||||
```
|
```
|
||||||
|
@ -428,7 +428,7 @@ You can use rounding to show the distribution of flights across the course of a
|
||||||
#| since midnight so it's hard to interpret.
|
#| since midnight so it's hard to interpret.
|
||||||
flights_dt |>
|
flights_dt |>
|
||||||
mutate(dep_hour = dep_time - floor_date(dep_time, "day")) |>
|
mutate(dep_hour = dep_time - floor_date(dep_time, "day")) |>
|
||||||
ggplot(aes(dep_hour)) +
|
ggplot(aes(x = dep_hour)) +
|
||||||
geom_freqpoly(binwidth = 60 * 30)
|
geom_freqpoly(binwidth = 60 * 30)
|
||||||
```
|
```
|
||||||
|
|
||||||
|
@ -445,7 +445,7 @@ We can convert that to an `hms` object to get a more useful x-axis:
|
||||||
#| around 12,000 per hour until 8pm, when they rapidly drop again.
|
#| around 12,000 per hour until 8pm, when they rapidly drop again.
|
||||||
flights_dt |>
|
flights_dt |>
|
||||||
mutate(dep_hour = hms::as_hms(dep_time - floor_date(dep_time, "day"))) |>
|
mutate(dep_hour = hms::as_hms(dep_time - floor_date(dep_time, "day"))) |>
|
||||||
ggplot(aes(dep_hour)) +
|
ggplot(aes(x = dep_hour)) +
|
||||||
geom_freqpoly(binwidth = 60 * 30)
|
geom_freqpoly(binwidth = 60 * 30)
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
14
factors.qmd
14
factors.qmd
|
@ -147,7 +147,7 @@ Or with a bar chart:
|
||||||
#| A bar chart showing the distribution of race. There are ~2000
|
#| A bar chart showing the distribution of race. There are ~2000
|
||||||
#| records with race "Other", 3000 with race "Black", and other
|
#| records with race "Other", 3000 with race "Black", and other
|
||||||
#| 15,000 with race "White".
|
#| 15,000 with race "White".
|
||||||
ggplot(gss_cat, aes(race)) +
|
ggplot(gss_cat, aes(x = race)) +
|
||||||
geom_bar()
|
geom_bar()
|
||||||
```
|
```
|
||||||
|
|
||||||
|
@ -185,7 +185,7 @@ relig_summary <- gss_cat |>
|
||||||
n = n()
|
n = n()
|
||||||
)
|
)
|
||||||
|
|
||||||
ggplot(relig_summary, aes(tvhours, relig)) +
|
ggplot(relig_summary, aes(x = tvhours, x = relig)) +
|
||||||
geom_point()
|
geom_point()
|
||||||
```
|
```
|
||||||
|
|
||||||
|
@ -202,7 +202,7 @@ We can improve it by reordering the levels of `relig` using `fct_reorder()`.
|
||||||
#| The same scatterplot as above, but now the religion is displayed in
|
#| The same scatterplot as above, but now the religion is displayed in
|
||||||
#| increasing order of tvhours. "Other eastern" has the fewest tvhours
|
#| increasing order of tvhours. "Other eastern" has the fewest tvhours
|
||||||
#| under 2, and "Don't know" has the highest (over 5).
|
#| under 2, and "Don't know" has the highest (over 5).
|
||||||
ggplot(relig_summary, aes(tvhours, fct_reorder(relig, tvhours))) +
|
ggplot(relig_summary, aes(x = tvhours, y = fct_reorder(relig, tvhours))) +
|
||||||
geom_point()
|
geom_point()
|
||||||
```
|
```
|
||||||
|
|
||||||
|
@ -218,7 +218,7 @@ relig_summary |>
|
||||||
mutate(
|
mutate(
|
||||||
relig = fct_reorder(relig, tvhours)
|
relig = fct_reorder(relig, tvhours)
|
||||||
) |>
|
) |>
|
||||||
ggplot(aes(tvhours, relig)) +
|
ggplot(aes(x = tvhours, y = relig)) +
|
||||||
geom_point()
|
geom_point()
|
||||||
```
|
```
|
||||||
|
|
||||||
|
@ -238,7 +238,7 @@ rincome_summary <- gss_cat |>
|
||||||
n = n()
|
n = n()
|
||||||
)
|
)
|
||||||
|
|
||||||
ggplot(rincome_summary, aes(age, fct_reorder(rincome, age))) +
|
ggplot(rincome_summary, aes(x = age, y = fct_reorder(rincome, age))) +
|
||||||
geom_point()
|
geom_point()
|
||||||
```
|
```
|
||||||
|
|
||||||
|
@ -294,7 +294,7 @@ by_age <- gss_cat |>
|
||||||
ggplot(by_age, aes(age, prop, color = marital)) +
|
ggplot(by_age, aes(age, prop, color = marital)) +
|
||||||
geom_line(na.rm = TRUE)
|
geom_line(na.rm = TRUE)
|
||||||
|
|
||||||
ggplot(by_age, aes(age, prop, color = fct_reorder2(marital, age, prop))) +
|
ggplot(by_age, aes(x = age, y = prop, color = fct_reorder2(marital, age, prop))) +
|
||||||
geom_line() +
|
geom_line() +
|
||||||
labs(color = "marital")
|
labs(color = "marital")
|
||||||
```
|
```
|
||||||
|
@ -309,7 +309,7 @@ Combine it with `fct_rev()` if you want them in increasing frequency so that in
|
||||||
#| (~3,000), never married (~5,000), married (~10,000).
|
#| (~3,000), never married (~5,000), married (~10,000).
|
||||||
gss_cat |>
|
gss_cat |>
|
||||||
mutate(marital = marital |> fct_infreq() |> fct_rev()) |>
|
mutate(marital = marital |> fct_infreq() |> fct_rev()) |>
|
||||||
ggplot(aes(marital)) +
|
ggplot(aes(x = marital)) +
|
||||||
geom_bar()
|
geom_bar()
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
|
@ -972,7 +972,7 @@ Let's first make a function that draws the plot we want:
|
||||||
|
|
||||||
```{r}
|
```{r}
|
||||||
carat_histogram <- function(df) {
|
carat_histogram <- function(df) {
|
||||||
ggplot(df, aes(carat)) + geom_histogram(binwidth = 0.1)
|
ggplot(df, aes(x = carat)) + geom_histogram(binwidth = 0.1)
|
||||||
}
|
}
|
||||||
|
|
||||||
carat_histogram(by_clarity$data[[1]])
|
carat_histogram(by_clarity$data[[1]])
|
||||||
|
|
|
@ -373,7 +373,7 @@ flights2 |>
|
||||||
|
|
||||||
airports |>
|
airports |>
|
||||||
semi_join(flights, join_by(faa == dest)) |>
|
semi_join(flights, join_by(faa == dest)) |>
|
||||||
ggplot(aes(lon, lat)) +
|
ggplot(aes(x = lon, y = lat)) +
|
||||||
borders("state") +
|
borders("state") +
|
||||||
geom_point() +
|
geom_point() +
|
||||||
coord_quickmap()
|
coord_quickmap()
|
||||||
|
@ -394,7 +394,7 @@ flights2 |>
|
||||||
summarize(delay = mean(arr_delay), n = n()) |>
|
summarize(delay = mean(arr_delay), n = n()) |>
|
||||||
filter(n > 5) |>
|
filter(n > 5) |>
|
||||||
inner_join(airports, by = c("dest" = "faa")) |>
|
inner_join(airports, by = c("dest" = "faa")) |>
|
||||||
ggplot(aes(lon, lat)) +
|
ggplot(aes(x = lon, y = lat)) +
|
||||||
borders("state") +
|
borders("state") +
|
||||||
geom_point(aes(size = n, color = delay)) +
|
geom_point(aes(size = n, color = delay)) +
|
||||||
coord_quickmap()
|
coord_quickmap()
|
||||||
|
|
|
@ -235,11 +235,11 @@ You can force them to display by supplying `drop = FALSE` to the appropriate dis
|
||||||
#|
|
#|
|
||||||
#| The same bar chart as the last plot, but now with two values on
|
#| The same bar chart as the last plot, but now with two values on
|
||||||
#| the x-axis, "yes" and "no". There is no bar for the "yes" category.
|
#| the x-axis, "yes" and "no". There is no bar for the "yes" category.
|
||||||
ggplot(health, aes(smoker)) +
|
ggplot(health, aes(x = smoker)) +
|
||||||
geom_bar() +
|
geom_bar() +
|
||||||
scale_x_discrete()
|
scale_x_discrete()
|
||||||
|
|
||||||
ggplot(health, aes(smoker)) +
|
ggplot(health, aes(x = smoker)) +
|
||||||
geom_bar() +
|
geom_bar() +
|
||||||
scale_x_discrete(drop = FALSE)
|
scale_x_discrete(drop = FALSE)
|
||||||
```
|
```
|
||||||
|
|
|
@ -412,7 +412,7 @@ To illustrate the principle, the following three plots have `fig-width` of 4, 6,
|
||||||
```{r}
|
```{r}
|
||||||
#| include: false
|
#| include: false
|
||||||
|
|
||||||
plot <- ggplot(mpg, aes(displ, hwy)) + geom_point()
|
plot <- ggplot(mpg, aes(x = displ, y = hwy)) + geom_point()
|
||||||
```
|
```
|
||||||
|
|
||||||
```{r}
|
```{r}
|
||||||
|
|
|
@ -18,7 +18,7 @@ class <- mpg |> filter(class == params$my_class)
|
||||||
```{r}
|
```{r}
|
||||||
#| message: false
|
#| message: false
|
||||||
|
|
||||||
ggplot(class, aes(displ, hwy)) +
|
ggplot(class, aes(x = displ, y = hwy)) +
|
||||||
geom_point() +
|
geom_point() +
|
||||||
geom_smooth(se = FALSE)
|
geom_smooth(se = FALSE)
|
||||||
```
|
```
|
||||||
|
|
Loading…
Reference in New Issue