From ac4a138ed65d0d0c43ab127e8efa485684cee169 Mon Sep 17 00:00:00 2001 From: hadley Date: Mon, 3 Oct 2016 07:29:36 -0500 Subject: [PATCH] Use tibble and tribble everywhere Fixes #411 --- model-basics.Rmd | 4 ++-- model-many.Rmd | 2 +- tidy.Rmd | 10 +++++----- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/model-basics.Rmd b/model-basics.Rmd index c494533..4ad0e61 100644 --- a/model-basics.Rmd +++ b/model-basics.Rmd @@ -327,7 +327,7 @@ You've seen formulas before when using `facet_wrap()` and `facet_grid()`. In R, The majority of modelling functions in R use a standard conversion from formulas to functions. You've seen one simple conversion already: `y ~ x` is translated to `y = a_1 + a_2 * x`. If you want to see what R actually does, you can use the `model_matrix()` function. It takes a data frame and a formula and returns a tibble that defines the model equation: each column in the output is associated with one coefficient in the model, the function is always `y = a_1 * out1 + a_2 * out_2`. For the simplest case of `y ~ x1` this shows us something interesting: ```{r} -df <- frame_data( +df <- tibble::tribble( ~y, ~x1, ~x2, 4, 2, 5, 5, 1, 6 @@ -356,7 +356,7 @@ The following sections expand on how this formula notation works for categorcal Generating a function from a formula is straight forward when the predictor is continuous, but things get a bit more complicated when the predictor is categorical. Imagine you have a formula like `y ~ sex`, where sex could either be male or female. It doesn't make sense to convert that to a formula like `y = x_0 + x_1 * sex` because `sex` isn't a number - you can't multiply it! Instead what R does is convert it to `y = x_0 + x_1 * sex_male` where `sex_male` is one if `sex` is male and zero otherwise: ```{r, echo = FALSE} -df <- frame_data( +df <- tibble::tribble( ~ sex, ~ response, "male", 1, "female", 2, diff --git a/model-many.Rmd b/model-many.Rmd index c7e32fc..ead8b7e 100644 --- a/model-many.Rmd +++ b/model-many.Rmd @@ -376,7 +376,7 @@ df %>% Another example of this pattern is using the `map()`, `map2()`, `pmap()` from purrr. For example, we could take the final example from [Invoking different functions] and rewrite it to use `mutate()`: ```{r} -sim <- tibble::frame_data( +sim <- tibble::tribble( ~f, ~params, "runif", list(min = -1, max = -1), "rnorm", list(sd = 5), diff --git a/tidy.Rmd b/tidy.Rmd index 0a4a1ef..f8244a0 100644 --- a/tidy.Rmd +++ b/tidy.Rmd @@ -206,7 +206,7 @@ As you might have guessed from the common `key` and `value` arguments, `spread() Carefully consider the following example: ```{r, eval = FALSE} - stocks <- data_frame( + stocks <- tibble::tibble( year = c(2015, 2015, 2016, 2016), half = c( 1, 2, 1, 2), return = c(1.88, 0.59, 0.92, 0.17) @@ -232,7 +232,7 @@ As you might have guessed from the common `key` and `value` arguments, `spread() the problem? ```{r} - people <- frame_data( + people <- tibble::tribble( ~name, ~key, ~value, #-----------------|--------|------ "Phillip Woods", "age", 45, @@ -247,7 +247,7 @@ As you might have guessed from the common `key` and `value` arguments, `spread() What are the variables? ```{r} - preg <- frame_data( + preg <- tibble::tribble( ~pregnant, ~male, ~female, "yes", NA, 10, "no", 20, 12 @@ -353,7 +353,7 @@ Changing the representation of a dataset brings up an important subtlety of miss Let's illustrate this idea with a very simple data set: ```{r} -stocks <- data_frame( +stocks <- tibble::tibble( year = c(2015, 2015, 2015, 2015, 2016, 2016, 2016), qtr = c( 1, 2, 3, 4, 2, 3, 4), return = c(1.88, 0.59, 0.35, NA, 0.92, 0.17, 2.66) @@ -397,7 +397,7 @@ stocks %>% There's one other important tool that you should know for working with missing values. Sometimes when a data source has primarily been used for data entry, missing values indicate that the previous value should be carried forward: ```{r} -treatment <- frame_data( +treatment <- tibble::tribble( ~ person, ~ treatment, ~response, "Derrick Whitmore", 1, 7, NA, 2, 10,