From 3774e95a4ad522350d40473f14700e084aca3073 Mon Sep 17 00:00:00 2001 From: hadley Date: Tue, 10 Nov 2015 07:50:20 -0600 Subject: [PATCH 01/14] Start updating travis build script --- .travis.yml | 49 ++++++++++++++++++----------------- DESCRIPTION | 32 +++++++++++++++++++++++ Gemfile | 5 ++++ Gemfile.lock | 73 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 135 insertions(+), 24 deletions(-) create mode 100644 DESCRIPTION create mode 100644 Gemfile create mode 100644 Gemfile.lock diff --git a/.travis.yml b/.travis.yml index 3bb044c..82244d5 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,32 +1,33 @@ language: c -sudo: required +sudo: false -before_install: - - curl -OL http://raw.github.com/craigcitro/r-travis/master/scripts/travis-tool.sh - - chmod 755 ./travis-tool.sh - - ./travis-tool.sh bootstrap +addons: + apt: + sources: + - r-packages-precise + packages: + - r-base-dev + - r-recommended + - pandoc + - libxml2-dev + +env: + global: + - R_LIBS_USER=~/R/library + +cache: + directories: + - $R_LIBS_USER + - vendor/bundle install: - # Install binary pandoc from Rstudio - - export PATH="$HOME/pandoc:$PATH" - - mkdir $HOME/pandoc - - curl -O https://s3.amazonaws.com/rstudio-buildtools/pandoc-1.12.3.zip - - unzip -j pandoc-1.12.3.zip pandoc-1.12.3/linux/debian/x86_64/pandoc - -d $HOME/pandoc - - chmod +x $HOME/pandoc/pandoc - - pandoc --version - - rm pandoc-1.12.3.zip + - bundle install --jobs=3 --retry=3 + - mkdir -p "$R_LIBS_USER" + - Rscript -e 'if (length(find.package("devtools", quiet = TRUE)) == 0L) { install.packages("devtools", repos = "http://cran.rstudio.com") }' + - Rscript -e 'devtools::update_packages("devtools", repos = "http://cran.rstudio.com")' + - Rscript -e 'devtools::install_deps(repos = "http://cran.rstudio.com", dependencies = TRUE)' - # Install jekyll - - travis_retry gem install mime-types - - travis_retry gem install jekyll -v 2.5.3 - - # Install R packages - - ./travis-tool.sh r_binary_install knitr png - - ./travis-tool.sh r_install broom purrr jsonlite ggplot2 dplyr tidyr pryr stringr htmlwidgets htmltools microbenchmark - - ./travis-tool.sh github_package hadley/bookdown garrettgman/DSR hadley/readr gaborcsardi/rcorpora hadley/stringr - -script: jekyll build +script: bundle exec jekyll build after_success: - cp -r figures/ _site/figures diff --git a/DESCRIPTION b/DESCRIPTION new file mode 100644 index 0000000..d37afcb --- /dev/null +++ b/DESCRIPTION @@ -0,0 +1,32 @@ +Package: r4ds +Title: R for data science. +Version: 0.1 +Authors@R: c( + person("Hadley", "Wickham", , "hadley@rstudio.com", c("aut", "cre")), + person("Garrett", "Grolemund", , "garrett@rstudio.com", "aut") + ) +Depends: R (>= 3.1.0) +URL: https://github.com/hadley/r4ds +Imports: + bookdown, + broom, + dplyr, + DSR, + ggplot2, + htmltools, + htmlwidgets, + jsonlite, + knitr, + microbenchmark, + png, + pryr, + purrr, + rcorpora, + stringr, + tidyr +Remotes: + hadley/bookdown, + garrettgman/DSR, + hadley/readr, + gaborcsardi/rcorpora, + hadley/stringr diff --git a/Gemfile b/Gemfile new file mode 100644 index 0000000..7f2d169 --- /dev/null +++ b/Gemfile @@ -0,0 +1,5 @@ +source 'https://rubygems.org' +gem 'mime-types' +gem 'jekyll', '~>2.5.3' +gem 'dpl' +gem 'jmespath' diff --git a/Gemfile.lock b/Gemfile.lock new file mode 100644 index 0000000..97afad8 --- /dev/null +++ b/Gemfile.lock @@ -0,0 +1,73 @@ +GEM + remote: https://rubygems.org/ + specs: + blankslate (2.1.2.4) + classifier-reborn (2.0.4) + fast-stemmer (~> 1.0) + coffee-script (2.4.1) + coffee-script-source + execjs + coffee-script-source (1.9.1.1) + colorator (0.1) + dpl (1.8.7) + execjs (2.6.0) + fast-stemmer (1.0.2) + ffi (1.9.10) + jekyll (2.5.3) + classifier-reborn (~> 2.0) + colorator (~> 0.1) + jekyll-coffeescript (~> 1.0) + jekyll-gist (~> 1.0) + jekyll-paginate (~> 1.0) + jekyll-sass-converter (~> 1.0) + jekyll-watch (~> 1.1) + kramdown (~> 1.3) + liquid (~> 2.6.1) + mercenary (~> 0.3.3) + pygments.rb (~> 0.6.0) + redcarpet (~> 3.1) + safe_yaml (~> 1.0) + toml (~> 0.1.0) + jekyll-coffeescript (1.0.1) + coffee-script (~> 2.2) + jekyll-gist (1.3.5) + jekyll-paginate (1.1.0) + jekyll-sass-converter (1.3.0) + sass (~> 3.2) + jekyll-watch (1.3.0) + listen (~> 3.0) + jmespath (1.1.3) + kramdown (1.9.0) + liquid (2.6.3) + listen (3.0.4) + rb-fsevent (>= 0.9.3) + rb-inotify (>= 0.9) + mercenary (0.3.5) + mime-types (2.6.2) + parslet (1.5.0) + blankslate (~> 2.0) + posix-spawn (0.3.11) + pygments.rb (0.6.3) + posix-spawn (~> 0.3.6) + yajl-ruby (~> 1.2.0) + rb-fsevent (0.9.6) + rb-inotify (0.9.5) + ffi (>= 0.5.0) + redcarpet (3.3.3) + safe_yaml (1.0.4) + sass (3.4.19) + toml (0.1.2) + parslet (~> 1.5.0) + yajl-ruby (1.2.1) + +PLATFORMS + ruby + +DEPENDENCIES + dpl + jekyll (~> 2.5.3) + jmespath + mime-types + +BUNDLED WITH + 1.10.6 From ca5ad1e5c9cffb8d7aad756ec31e2b03a3d11ed7 Mon Sep 17 00:00:00 2001 From: hadley Date: Tue, 10 Nov 2015 07:52:44 -0600 Subject: [PATCH 02/14] Install dev version of purrr --- DESCRIPTION | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index d37afcb..bd820b9 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -25,8 +25,9 @@ Imports: stringr, tidyr Remotes: - hadley/bookdown, - garrettgman/DSR, - hadley/readr, gaborcsardi/rcorpora, + garrettgman/DSR, + hadley/bookdown, + hadley/purrr, + hadley/readr, hadley/stringr From 8ae551b167c6076fbbe57bd7645d50cf0858f5bc Mon Sep 17 00:00:00 2001 From: hadley Date: Tue, 10 Nov 2015 08:53:41 -0600 Subject: [PATCH 03/14] Install dev version of devtools --- .travis.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index 82244d5..9105946 100644 --- a/.travis.yml +++ b/.travis.yml @@ -24,7 +24,7 @@ install: - bundle install --jobs=3 --retry=3 - mkdir -p "$R_LIBS_USER" - Rscript -e 'if (length(find.package("devtools", quiet = TRUE)) == 0L) { install.packages("devtools", repos = "http://cran.rstudio.com") }' - - Rscript -e 'devtools::update_packages("devtools", repos = "http://cran.rstudio.com")' + - Rscript -e 'devtools::install_gitihub("hadley/devtools")' - Rscript -e 'devtools::install_deps(repos = "http://cran.rstudio.com", dependencies = TRUE)' script: bundle exec jekyll build From db4ec4b57111436c64e024ac0895c0ef9d390fd6 Mon Sep 17 00:00:00 2001 From: hadley Date: Tue, 10 Nov 2015 08:56:01 -0600 Subject: [PATCH 04/14] Rstudio writes post-package conversion --- .Rbuildignore | 2 ++ r4ds.Rproj | 4 ++++ 2 files changed, 6 insertions(+) create mode 100644 .Rbuildignore diff --git a/.Rbuildignore b/.Rbuildignore new file mode 100644 index 0000000..91114bf --- /dev/null +++ b/.Rbuildignore @@ -0,0 +1,2 @@ +^.*\.Rproj$ +^\.Rproj\.user$ diff --git a/r4ds.Rproj b/r4ds.Rproj index 4f10a56..f6cd4b6 100644 --- a/r4ds.Rproj +++ b/r4ds.Rproj @@ -14,3 +14,7 @@ LaTeX: XeLaTeX AutoAppendNewline: Yes StripTrailingWhitespace: Yes + +BuildType: Package +PackageUseDevtools: Yes +PackageInstallArgs: --no-multiarch --with-keep.source From 0624c60690d93cc9c7fa0410f6101f06200f1429 Mon Sep 17 00:00:00 2001 From: hadley Date: Tue, 10 Nov 2015 09:02:51 -0600 Subject: [PATCH 05/14] Correct spelling --- .travis.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index 9105946..535df49 100644 --- a/.travis.yml +++ b/.travis.yml @@ -24,7 +24,7 @@ install: - bundle install --jobs=3 --retry=3 - mkdir -p "$R_LIBS_USER" - Rscript -e 'if (length(find.package("devtools", quiet = TRUE)) == 0L) { install.packages("devtools", repos = "http://cran.rstudio.com") }' - - Rscript -e 'devtools::install_gitihub("hadley/devtools")' + - Rscript -e 'devtools::install_github("hadley/devtools")' - Rscript -e 'devtools::install_deps(repos = "http://cran.rstudio.com", dependencies = TRUE)' script: bundle exec jekyll build From 11b6e828521f0ce17ae1d3916ee18be3f08ef60e Mon Sep 17 00:00:00 2001 From: hadley Date: Tue, 10 Nov 2015 10:14:13 -0600 Subject: [PATCH 06/14] Minor caching tweaks --- .travis.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index 535df49..e286a61 100644 --- a/.travis.yml +++ b/.travis.yml @@ -13,12 +13,13 @@ addons: env: global: - - R_LIBS_USER=~/R/library + - R_LIBS_USER=$HOME/R/library cache: directories: - $R_LIBS_USER - vendor/bundle + - $HOME/.ccache install: - bundle install --jobs=3 --retry=3 From 259806d04dc9fd0c7f1276b754f03c19e18fa7a0 Mon Sep 17 00:00:00 2001 From: hadley Date: Tue, 10 Nov 2015 10:44:33 -0600 Subject: [PATCH 07/14] Install pandoc from RStudio --- .travis.yml | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/.travis.yml b/.travis.yml index e286a61..5681b56 100644 --- a/.travis.yml +++ b/.travis.yml @@ -22,7 +22,19 @@ cache: - $HOME/.ccache install: + # Install binary pandoc from Rstudio + - export PATH="$HOME/pandoc:$PATH" + - mkdir $HOME/pandoc + - curl -O https://s3.amazonaws.com/rstudio-buildtools/pandoc-1.12.3.zip + - unzip -j pandoc-1.12.3.zip pandoc-1.12.3/linux/debian/x86_64/pandoc + -d $HOME/pandoc + - chmod +x $HOME/pandoc/pandoc + - pandoc --version + + # Install ruby gems - bundle install --jobs=3 --retry=3 + + # Install R packages - mkdir -p "$R_LIBS_USER" - Rscript -e 'if (length(find.package("devtools", quiet = TRUE)) == 0L) { install.packages("devtools", repos = "http://cran.rstudio.com") }' - Rscript -e 'devtools::install_github("hadley/devtools")' From 6c882aba69ee7e33dad303b8b91faf9d1823dc96 Mon Sep 17 00:00:00 2001 From: hadley Date: Tue, 10 Nov 2015 10:48:47 -0600 Subject: [PATCH 08/14] Install gems locally so they get cached --- .travis.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index 5681b56..e468b5b 100644 --- a/.travis.yml +++ b/.travis.yml @@ -32,7 +32,7 @@ install: - pandoc --version # Install ruby gems - - bundle install --jobs=3 --retry=3 + - bundle install --jobs=3 --retry=3 --deployment # Install R packages - mkdir -p "$R_LIBS_USER" From 059e8212e82d2c43f9f4eea64ac3f3663f1d2e22 Mon Sep 17 00:00:00 2001 From: hadley Date: Tue, 10 Nov 2015 10:52:08 -0600 Subject: [PATCH 09/14] Don't need readr from github anymore --- DESCRIPTION | 1 - 1 file changed, 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index bd820b9..cb96c4c 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -29,5 +29,4 @@ Remotes: garrettgman/DSR, hadley/bookdown, hadley/purrr, - hadley/readr, hadley/stringr From 5865a2dcff4aecc60699e55e54630b17a814584d Mon Sep 17 00:00:00 2001 From: hadley Date: Tue, 10 Nov 2015 10:54:37 -0600 Subject: [PATCH 10/14] Install more deployment deps so they get cached --- Gemfile | 3 +++ Gemfile.lock | 14 ++++++++++++++ 2 files changed, 17 insertions(+) diff --git a/Gemfile b/Gemfile index 7f2d169..a1b837b 100644 --- a/Gemfile +++ b/Gemfile @@ -3,3 +3,6 @@ gem 'mime-types' gem 'jekyll', '~>2.5.3' gem 'dpl' gem 'jmespath' +gem 'aws-sdk' +gem 'aws-sdk-core' +gem 'aws-sdk-resources' diff --git a/Gemfile.lock b/Gemfile.lock index 97afad8..39c3a53 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -1,6 +1,13 @@ GEM remote: https://rubygems.org/ specs: + aws-sdk (1.50.0) + json (~> 1.4) + nokogiri (>= 1.4.4) + aws-sdk-core (2.1.35) + jmespath (~> 1.0) + aws-sdk-resources (2.1.35) + aws-sdk-core (= 2.1.35) blankslate (2.1.2.4) classifier-reborn (2.0.4) fast-stemmer (~> 1.0) @@ -37,6 +44,7 @@ GEM jekyll-watch (1.3.0) listen (~> 3.0) jmespath (1.1.3) + json (1.8.1) kramdown (1.9.0) liquid (2.6.3) listen (3.0.4) @@ -44,6 +52,9 @@ GEM rb-inotify (>= 0.9) mercenary (0.3.5) mime-types (2.6.2) + mini_portile (0.6.0) + nokogiri (1.6.3.1) + mini_portile (= 0.6.0) parslet (1.5.0) blankslate (~> 2.0) posix-spawn (0.3.11) @@ -64,6 +75,9 @@ PLATFORMS ruby DEPENDENCIES + aws-sdk + aws-sdk-core + aws-sdk-resources dpl jekyll (~> 2.5.3) jmespath From 5b0acf27c024b5cc8464a89361f6647f1b3234cb Mon Sep 17 00:00:00 2001 From: hadley Date: Tue, 10 Nov 2015 10:57:03 -0600 Subject: [PATCH 11/14] Exclude vendor dir from jekyll --- _config.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/_config.yml b/_config.yml index b013c21..317ff8b 100644 --- a/_config.yml +++ b/_config.yml @@ -2,4 +2,4 @@ name: R for data science markdown: redcarpet highlighter: pygments -exclude: ["CONTRIBUTING.md", "README.md", "book"] +exclude: ["CONTRIBUTING.md", "README.md", "book", "vendor"] From f30293c27db4cb79f4f5d4fde29e130139905108 Mon Sep 17 00:00:00 2001 From: hadley Date: Tue, 10 Nov 2015 11:06:27 -0600 Subject: [PATCH 12/14] Clean up after pandoc install --- .travis.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index e468b5b..0ea1e37 100644 --- a/.travis.yml +++ b/.travis.yml @@ -23,12 +23,13 @@ cache: install: # Install binary pandoc from Rstudio - - export PATH="$HOME/pandoc:$PATH" - mkdir $HOME/pandoc - curl -O https://s3.amazonaws.com/rstudio-buildtools/pandoc-1.12.3.zip - unzip -j pandoc-1.12.3.zip pandoc-1.12.3/linux/debian/x86_64/pandoc -d $HOME/pandoc - chmod +x $HOME/pandoc/pandoc + - rm pandoc-1.12.3.zip + - export PATH="$HOME/pandoc:$PATH" - pandoc --version # Install ruby gems From d413489950a10ef3dca75b12fb215c4755176ebe Mon Sep 17 00:00:00 2001 From: hadley Date: Tue, 10 Nov 2015 11:06:41 -0600 Subject: [PATCH 13/14] Installing deploy deps doesn't seem to save any time --- Gemfile | 5 ----- Gemfile.lock | 18 ------------------ 2 files changed, 23 deletions(-) diff --git a/Gemfile b/Gemfile index a1b837b..29311e0 100644 --- a/Gemfile +++ b/Gemfile @@ -1,8 +1,3 @@ source 'https://rubygems.org' gem 'mime-types' gem 'jekyll', '~>2.5.3' -gem 'dpl' -gem 'jmespath' -gem 'aws-sdk' -gem 'aws-sdk-core' -gem 'aws-sdk-resources' diff --git a/Gemfile.lock b/Gemfile.lock index 39c3a53..73c5a0d 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -1,13 +1,6 @@ GEM remote: https://rubygems.org/ specs: - aws-sdk (1.50.0) - json (~> 1.4) - nokogiri (>= 1.4.4) - aws-sdk-core (2.1.35) - jmespath (~> 1.0) - aws-sdk-resources (2.1.35) - aws-sdk-core (= 2.1.35) blankslate (2.1.2.4) classifier-reborn (2.0.4) fast-stemmer (~> 1.0) @@ -16,7 +9,6 @@ GEM execjs coffee-script-source (1.9.1.1) colorator (0.1) - dpl (1.8.7) execjs (2.6.0) fast-stemmer (1.0.2) ffi (1.9.10) @@ -43,8 +35,6 @@ GEM sass (~> 3.2) jekyll-watch (1.3.0) listen (~> 3.0) - jmespath (1.1.3) - json (1.8.1) kramdown (1.9.0) liquid (2.6.3) listen (3.0.4) @@ -52,9 +42,6 @@ GEM rb-inotify (>= 0.9) mercenary (0.3.5) mime-types (2.6.2) - mini_portile (0.6.0) - nokogiri (1.6.3.1) - mini_portile (= 0.6.0) parslet (1.5.0) blankslate (~> 2.0) posix-spawn (0.3.11) @@ -75,12 +62,7 @@ PLATFORMS ruby DEPENDENCIES - aws-sdk - aws-sdk-core - aws-sdk-resources - dpl jekyll (~> 2.5.3) - jmespath mime-types BUNDLED WITH From a9b7f2f3a81b87ad900af39f2a2671dd1fb1c0be Mon Sep 17 00:00:00 2001 From: hadley Date: Tue, 10 Nov 2015 11:12:09 -0600 Subject: [PATCH 14/14] More work on lists chapter --- lists.Rmd | 53 +++++++++++++++++++++++++++++++++++++++-------------- 1 file changed, 39 insertions(+), 14 deletions(-) diff --git a/lists.Rmd b/lists.Rmd index 7477758..c40f6fd 100644 --- a/lists.Rmd +++ b/lists.Rmd @@ -183,8 +183,12 @@ map_dbl(x, function(x) mean(x, trim = 0.5)) Other outputs: * `flatten()` +* `map_int()` vs. `map()` + `flatten_int()` +* `flatmap()` * `dplyr::bind_rows()` +Need sidebar/callout about predicate functions somewhere. Better to use purrr's underscore variants because they tend to do what you expect, and + ### Base equivalents * `lapply()` is effectively identical to `map()`. The advantage to using @@ -300,17 +304,41 @@ Other predicate functions: `head_while()`, `tail_while()`, `some()`, `every()`, ## Dealing with failure -Motivation: you try to fit a bunch of models, and they don't all -succeed/converge. How do you make sure one failure doesn't kill your -whole process? +When you start doing many operations with purrr, you'll soon discover that not everything always succeeds. For example, you might be fitting a bunch of more complicated models, and not every model will converge. How do you ensure that one bad apple doesn't ruin the whole barrel? -Key tool: try()? failwith()? maybe()? (purrr needs to provide a -definitive answer here) +Dealing with errors is fundamentally painful because errors are sort of a side-channel to the way that functions usually return values. The best way to handle them is to turn them into a regular output with the `safe()` function. This function is similar to the `try()` function in base R, but instead of sometimes returning the original output and sometimes returning a error, `safe()` always returns the same type of object: a list with elements `result` and `error`. For any given run, one will always be `NULL`, but because the structure is always the same its easier to deal with. -Use map_lgl() to create logical vector of success/failure. (Or have -helper function that wraps? succeeded()? failed()?). Extract successes -and do something to them. Extract cases that lead to failure (e.g. -which datasets did models fail to converge for) +Let's illustrate this with a simple example: `log()`: + +```{r} +safe_log <- safe(log) +str(safe_log(10)) +str(safe_log("a")) +``` + +You can see when the function succeeds the result element contains the result and the error element is empty. When the function fails, the result element is empty and the error element contains the error. + +This makes it natural to work with map: + +```{r} +x <- list(1, 10, "a") +y <- x %>% map(safe_log) +str(y) +``` + +This output would be easier to work with if we had two lists: one of all the errors and one of all the results. Fortunately there's a purrr function that allows us to turn a list "inside out", `zip_n()`: + +```{r} +str(y %>% zip_n()) +``` + +It's up to you how to deal with these errors, but typically you'd start by looking at the values of `x` where `y` is an error or working with the values of y that are ok: + +```{r} +error <- y %>% map_lgl(~is.null(.$result)) +x[error] +y[!error] %>% map("result") +``` Challenge: read_csv all the files in this directory. Which ones failed and why? Potentially helpful digression into names() and bind_rows(id @@ -319,13 +347,10 @@ and why? Potentially helpful digression into names() and bind_rows(id ```{r, eval = FALSE} files <- dir("data", pattern = "\\.csv$") files %>% - setNames(basename(.)) %>% - map(read_csv) %>% - bind_rows(id = "name") + set_names(basename(.)) %>% + map_df(readr::read_csv, .id = "filename") %>% ``` -(maybe purrr needs set_names) - ## Multiple inputs So far we've focussed on variants that differ primarily in their output. There is a family of useful variants that vary primarily in their input: `map2()`, `map3()` and `map_n()`.