From fad2d6c940f9110bc434ae6c435b2bc30289f672 Mon Sep 17 00:00:00 2001
From: Hadley Wickham <h.wickham@gmail.com>
Date: Thu, 20 Oct 2022 14:24:19 -0500
Subject: [PATCH] Proof read alt text

---
 joins.qmd | 81 +++++++++++++++++++++++++++----------------------------
 1 file changed, 39 insertions(+), 42 deletions(-)

diff --git a/joins.qmd b/joins.qmd
index 3441d2d..1ef4d61 100644
--- a/joins.qmd
+++ b/joins.qmd
@@ -93,17 +93,14 @@ These relationships are summarized visually in @fig-flights-relationships.
 #|   Variables making up a primary key are coloured grey, and are connected
 #|   to their corresponding foreign keys with arrows.
 #| fig-alt: >
-#|   Diagram showing the relationships between airports, planes, flights, 
-#|   weather, and airlines datasets from the nycflights13 package. The faa
-#|   variable in the airports data frame is connected to the origin and dest
-#|   variables in the flights data frame. The tailnum variable in the planes
-#|   data frame is connected to the tailnum variable in flights. The
-#|   time_hour and origin variables in the weather data frame are connected
-#|   to the variables with the same name in the flights data frame. And
-#|   finally the carrier variables in the airlines data frame is connected
-#|   to the carrier variable in the flights data frame. There are no direct
-#|   connections between airports, planes, airlines, and weather data 
-#|   frames.
+#|   The relationships between airports, planes, flights, weather, and
+#|   airlines datasets from the nycflights13 package. airports$faa
+#|   connected to the flights$origin and flights$dest. planes$tailnum
+#|   is connected to the flights$tailnum. weather$time_hour and
+#|   weather$origin are jointly connected to flights$time_hour and 
+#|   flights$origin. airlines$carrier is connected to flights$carrier.
+#|   There are no direct connections between airports, planes, airlines, 
+#|   and weather data frames.
 knitr::include_graphics("diagrams/relational.png", dpi = 270)
 ```
 
@@ -433,9 +430,9 @@ y <- tribble(
 #|   columns map background colour to key value. The grey columns represent
 #|   the "value" columns that are carried along for the ride. 
 #| fig-alt: >
-#|   x and y are two data frames with 2 columns and 3 rows each. The first
-#|   column in each is the key and the second is the value. The contents of
-#|   these data frames are given in the previous code chunk.
+#|   x and y are two data frames with 2 columns and 3 rows, with contents
+#|   as described in the text. The values of the keys are coloured:
+#|   1 is green, 2 is purple, 3 is orange, and 4 is yellow.
 
 knitr::include_graphics("diagrams/join/setup.png", dpi = 270)
 ```
@@ -453,8 +450,8 @@ The rows and columns in the output are primarily determined by `x`, so the `x` t
 #| fig-alt: >
 #|   x and y are placed at right-angles, with horizonal lines extending 
 #|   from x and vertical lines extending from y. There are 3 rows in x and 
-#|   3 rows in y leading to 9 intersections that represent nine potential
-#|   matches.
+#|   3 rows in y, which leads to nine intersections representing nine
+#|   potential matches.
 
 knitr::include_graphics("diagrams/join/setup2.png", dpi = 270)
 ```
@@ -473,8 +470,9 @@ We'll come back to non-equi joins in @sec-non-equi-joins.
 #|   An inner join matches each row in `x` to the row in `y` that has the
 #|   same value of `key`. Each match becomes a row in the output.
 #| fig-alt: >
-#|   Keys 1 and 2 appear in both x and y, so there values are equal and
-#|   we get a match, indicated by a dot. Each dot corresponds to a row
+#|   x and y are placed at right-angles with lines forming a grid of
+#|   potential matches. Keys 1 and 2 appear in both x and y, so we
+#|   get a match, indicated by a dot. Each dot corresponds to a row
 #|   in the output, so the resulting joined data frame has two rows.
 
 knitr::include_graphics("diagrams/join/inner.png", dpi = 270)
@@ -496,10 +494,11 @@ There are three types of outer joins:
     #|   A visual representation of the left join where every row in `x`
     #|   appears in the output.
     #| fig-alt: >
-    #|   Compared to the inner join, the `y` table gets a new virtual row
-    #|   that will match any row in `x` that doesn't otherwise have a match.
-    #|   This means that the output now has three rows. For key = 3, which
-    #|   matches this virtual row, the value of val_y is NA.
+    #|   Compared to the previous diagram showing an inner join, the y table
+    #|   gets a new virtual row containin NA that will match any row in x
+    #|   that didn't otherwise match. This means that the output now has
+    #|   three rows. For key = 3, which matches this virtual row, val_y takes
+    #|   value NA.
 
     knitr::include_graphics("diagrams/join/left.png", dpi = 270)
     ```
@@ -516,12 +515,9 @@ There are three types of outer joins:
     #|   A visual representation of the right join where every row of `y` 
     #|   appears in the output. 
     #| fig-alt: >
-    #|   Keys 1 and 2 from x are matched to those in y, key 4 is 
-    #|   also carried along to the joined result since it's on the right data
-    #|   frame,  but key 3 from x is not carried along since it's on the left
-    #|   but not on the right. The result is a data frame with 3 rows: keys 
-    #|   1, 2, and 4, all values from val_y, and the corresponding values
-    #|   from val_x for keys 1 and 2 with an NA for key 4, val_x. 
+    #|   Compared to the previous diagram showing an left join, the x table
+    #|   now gains a virtual row so that every row in y gets a match in x.
+    #|   val_x contains NA for the row in y that didn't match x.
 
     knitr::include_graphics("diagrams/join/right.png", dpi = 270)
     ```
@@ -538,6 +534,7 @@ There are three types of outer joins:
     #|   A visual representation of the full join where every row in `x`
     #|   and `y` appears in the output.
     #| fig-alt: >
+    #|   Now both x and y have a virtual row that always matches.
     #|   The result has 4 rows: keys 1, 2, 3, and 4 with all values 
     #|   from val_x and val_y, however key 2, val_y and key 4, val_x are NAs
     #|   since those keys don't have a match in the other data frames.
@@ -561,10 +558,10 @@ However, this is not a great representation because while it might jog your memo
 #|   and y, with x on the right and y on the left. Shading indicates the
 #|   result of the join. 
 #|
-#|   Inner join: Only intersection is shaded. 
+#|   Inner join: the intersection is shaded. 
 #|   Full join: Everything is shaded. 
 #|   Left join: All of x is shaded.
-#|   Right: All of y is shaded.
+#|   Right join: All of y is shaded.
 
 knitr::include_graphics("diagrams/join/venn.png", dpi = 270)
 ```
@@ -690,11 +687,9 @@ This means that filtering joins never duplicate rows like mutating joins do.
 #|   In a semi-join it only matters that there is a match; otherwise
 #|   values in `y` don't affect the output.
 #| fig-alt: >
-#|   Diagram of a semi join. Data frame x is on the left and has two columns 
-#|   (key and val_x) with keys 1, 2, and 3. Diagram y is on the right and also 
-#|   has two columns (key and val_y) with keys 1, 2, and 4. Semi joining these 
-#|   two results in a data frame with two rows and two columns (key and val_x), 
-#|   with keys 1 and 2 (the only keys that match between the two data frames).
+#|   A join diagram with old friends x and y. In a semi join, only the 
+#|   presence of a match matters so the output contains the same columns
+#|   as x.
 
 knitr::include_graphics("diagrams/join/semi.png", dpi = 270)
 ```
@@ -707,11 +702,8 @@ knitr::include_graphics("diagrams/join/semi.png", dpi = 270)
 #|   An anti-join is the inverse of a semi-join, dropping rows from `x`
 #|   that have a match in `y`.
 #| fig-alt: >
-#|   Diagram of an anti join. Data frame x is on the left and has two columns 
-#|   (key and val_x) with keys 1, 2, and 3. Diagram y is on the right and also 
-#|   has two columns (key and val_y) with keys 1, 2, and 4. Anti joining these 
-#|   two results in a data frame with one row and two columns (key and val_x), 
-#|   with keys 3 only (the only key in x that is not in y).
+#|   An anti-join is the inverse of a semi-join so matches are drawn with
+#|   red lines indicating that they will be dropped from the output.
 
 knitr::include_graphics("diagrams/join/anti.png", dpi = 270)
 ```
@@ -737,7 +729,7 @@ x |> left_join(y, by = "key", keep = TRUE)
 #|   A join diagram showing an inner join betwen x and y. The result
 #|   now includes four columns: key.x, val_x, key.y, and val_y. The
 #|   values of key.x and key.y are identical, which is why we usually
-#|   omit one.
+#|   only show one.
 #| echo: false
 #| out-width: ~
 
@@ -807,7 +799,8 @@ Inequality joins use `<`, `<=`, `>=`, or `>` to restrict the set of possible mat
 #| out-width: ~
 #| fig-cap: >
 #|   An inequality join where `x` is joined to `y` on rows where the key 
-#|   of `x` is less than the key of `y`.
+#|   of `x` is less than the key of `y`. This makes a triangular
+#|   shape in the top-left corner.
 knitr::include_graphics("diagrams/join/lt.png", dpi = 270)
 ```
 
@@ -833,6 +826,10 @@ For example `join_by(closest(x <= y))` matches the smallest `y` that's greater t
 #| fig-cap: >
 #|   A following join is similar to a greater-than-or-equal inequality join
 #|   but only matches the first value.
+#| fig-alt: >
+#|   A rolling join is a subset of an inequality join so some matches are
+#|   grayed out indicating that they're not used because they're not the 
+#|   "closest".
 knitr::include_graphics("diagrams/join/closest.png", dpi = 270)
 ```