Merge branch 'master' of github.com:hadley/r4ds

2015-11-10 17:28:16 -05:00 · 2015-11-10 17:28:16 -05:00 · 1931343cf3
parent e986755666 a9b7f2f3a8
commit 1931343cf3
8 changed files with 179 additions and 29 deletions
--- a/.Rbuildignore
+++ b/.Rbuildignore
@ -0,0 +1,2 @@
+^.*\.Rproj$
+^\.Rproj\.user$
--- a/.travis.yml
+++ b/.travis.yml
@ -1,32 +1,47 @@
 language: c
-sudo: required
+sudo: false

-before_install:
-  - curl -OL http://raw.github.com/craigcitro/r-travis/master/scripts/travis-tool.sh
-  - chmod 755 ./travis-tool.sh
-  - ./travis-tool.sh bootstrap
+addons:
+  apt:
+    sources:
+      - r-packages-precise
+    packages:
+      - r-base-dev
+      - r-recommended
+      - pandoc
+      - libxml2-dev
+
+env:
+  global:
+    - R_LIBS_USER=$HOME/R/library
+
+cache:
+  directories:
+    - $R_LIBS_USER
+    - vendor/bundle
+    - $HOME/.ccache

 install:
  # Install binary pandoc from Rstudio
-  - export PATH="$HOME/pandoc:$PATH"
  - mkdir $HOME/pandoc
  - curl -O https://s3.amazonaws.com/rstudio-buildtools/pandoc-1.12.3.zip
  - unzip -j pandoc-1.12.3.zip pandoc-1.12.3/linux/debian/x86_64/pandoc
      -d $HOME/pandoc
  - chmod +x $HOME/pandoc/pandoc
-  - pandoc --version
  - rm pandoc-1.12.3.zip
+  - export PATH="$HOME/pandoc:$PATH"
+  - pandoc --version

-  # Install jekyll
-  - travis_retry gem install mime-types
-  - travis_retry gem install jekyll -v 2.5.3
+  # Install ruby gems
+  - bundle install --jobs=3 --retry=3 --deployment

  # Install R packages
-  - ./travis-tool.sh r_binary_install knitr png
-  - ./travis-tool.sh r_install        broom purrr jsonlite ggplot2 dplyr tidyr pryr stringr htmlwidgets htmltools microbenchmark
-  - ./travis-tool.sh github_package   hadley/bookdown garrettgman/DSR hadley/readr gaborcsardi/rcorpora hadley/stringr
+  - mkdir -p "$R_LIBS_USER"
+  - Rscript -e 'if (length(find.package("devtools", quiet = TRUE)) == 0L) { install.packages("devtools", repos = "http://cran.rstudio.com") }'
+  - Rscript -e 'devtools::install_github("hadley/devtools")'
+  - Rscript -e 'devtools::install_deps(repos = "http://cran.rstudio.com", dependencies = TRUE)'

-script: jekyll build
+script: bundle exec jekyll build

 after_success:
  - cp -r figures/ _site/figures
--- a/32
+++ b/32
@ -0,0 +1,32 @@
+Package: r4ds
+Title: R for data science.
+Version: 0.1
+Authors@R: c(
+  person("Hadley", "Wickham", , "hadley@rstudio.com", c("aut", "cre")),
+  person("Garrett", "Grolemund", , "garrett@rstudio.com", "aut")
+  )
+Depends: R (>= 3.1.0)
+URL: https://github.com/hadley/r4ds
+Imports:
+  bookdown,
+  broom,
+  dplyr,
+  DSR,
+  ggplot2,
+  htmltools,
+  htmlwidgets,
+  jsonlite,
+  knitr,
+  microbenchmark,
+  png,
+  pryr,
+  purrr,
+  rcorpora,
+  stringr,
+  tidyr
+Remotes:
+  gaborcsardi/rcorpora,
+  garrettgman/DSR,
+  hadley/bookdown,
+  hadley/purrr,
+  hadley/stringr
--- a/3
+++ b/3
@ -0,0 +1,3 @@
+source 'https://rubygems.org'
+gem 'mime-types'
+gem 'jekyll', '~>2.5.3'
--- a/Gemfile.lock
+++ b/Gemfile.lock
@ -0,0 +1,69 @@
+GEM
+  remote: https://rubygems.org/
+  specs:
+    blankslate (2.1.2.4)
+    classifier-reborn (2.0.4)
+      fast-stemmer (~> 1.0)
+    coffee-script (2.4.1)
+      coffee-script-source
+      execjs
+    coffee-script-source (1.9.1.1)
+    colorator (0.1)
+    execjs (2.6.0)
+    fast-stemmer (1.0.2)
+    ffi (1.9.10)
+    jekyll (2.5.3)
+      classifier-reborn (~> 2.0)
+      colorator (~> 0.1)
+      jekyll-coffeescript (~> 1.0)
+      jekyll-gist (~> 1.0)
+      jekyll-paginate (~> 1.0)
+      jekyll-sass-converter (~> 1.0)
+      jekyll-watch (~> 1.1)
+      kramdown (~> 1.3)
+      liquid (~> 2.6.1)
+      mercenary (~> 0.3.3)
+      pygments.rb (~> 0.6.0)
+      redcarpet (~> 3.1)
+      safe_yaml (~> 1.0)
+      toml (~> 0.1.0)
+    jekyll-coffeescript (1.0.1)
+      coffee-script (~> 2.2)
+    jekyll-gist (1.3.5)
+    jekyll-paginate (1.1.0)
+    jekyll-sass-converter (1.3.0)
+      sass (~> 3.2)
+    jekyll-watch (1.3.0)
+      listen (~> 3.0)
+    kramdown (1.9.0)
+    liquid (2.6.3)
+    listen (3.0.4)
+      rb-fsevent (>= 0.9.3)
+      rb-inotify (>= 0.9)
+    mercenary (0.3.5)
+    mime-types (2.6.2)
+    parslet (1.5.0)
+      blankslate (~> 2.0)
+    posix-spawn (0.3.11)
+    pygments.rb (0.6.3)
+      posix-spawn (~> 0.3.6)
+      yajl-ruby (~> 1.2.0)
+    rb-fsevent (0.9.6)
+    rb-inotify (0.9.5)
+      ffi (>= 0.5.0)
+    redcarpet (3.3.3)
+    safe_yaml (1.0.4)
+    sass (3.4.19)
+    toml (0.1.2)
+      parslet (~> 1.5.0)
+    yajl-ruby (1.2.1)
+
+PLATFORMS
+  ruby
+
+DEPENDENCIES
+  jekyll (~> 2.5.3)
+  mime-types
+
+BUNDLED WITH
+   1.10.6
--- a/_config.yml
+++ b/_config.yml
@ -2,4 +2,4 @@ name: R for data science
 markdown: redcarpet
 highlighter: pygments

-exclude: ["CONTRIBUTING.md", "README.md", "book"]
+exclude: ["CONTRIBUTING.md", "README.md", "book", "vendor"]
--- a/lists.Rmd
+++ b/lists.Rmd
@ -183,8 +183,12 @@ map_dbl(x, function(x) mean(x, trim = 0.5))
 Other outputs:

 * `flatten()`
+* `map_int()` vs. `map()` + `flatten_int()`
+* `flatmap()`
 * `dplyr::bind_rows()`

+Need sidebar/callout about predicate functions somewhere. Better to use purrr's underscore variants because they tend to do what you expect, and 
+
 ### Base equivalents

 * `lapply()` is effectively identical to `map()`. The advantage to using
@ -300,17 +304,41 @@ Other predicate functions: `head_while()`, `tail_while()`, `some()`, `every()`,

 ## Dealing with failure

-Motivation: you try to fit a bunch of models, and they don't all
-succeed/converge. How do you make sure one failure doesn't kill your
-whole process?
+When you start doing many operations with purrr, you'll soon discover that not everything always succeeds. For example, you might be fitting a bunch of more complicated models, and not every model will converge. How do you ensure that one bad apple doesn't ruin the whole barrel?

-Key tool: try()? failwith()? maybe()? (purrr needs to provide a
-definitive answer here)
+Dealing with errors is fundamentally painful because errors are sort of a side-channel to the way that functions usually return values. The best way to handle them is to turn them into a regular output with the `safe()` function.  This function is similar to the `try()` function in base R, but instead of sometimes returning the original output and sometimes returning a error, `safe()` always returns the same type of object: a list with elements `result` and `error`. For any given run, one will always be `NULL`, but because the structure is always the same its easier to deal with.

-Use map_lgl() to create logical vector of success/failure. (Or have
-helper function that wraps? succeeded()? failed()?). Extract successes
-and do something to them. Extract cases that lead to failure (e.g.
-which datasets did models fail to converge for)
+Let's illustrate this with a simple example: `log()`:
+
+```{r}
+safe_log <- safe(log)
+str(safe_log(10))
+str(safe_log("a"))
+```
+
+You can see when the function succeeds the result element contains the result and the error element is empty. When the function fails, the result element is empty and the error element contains the error.
+
+This makes it natural to work with map:
+
+```{r}
+x <- list(1, 10, "a")
+y <- x %>% map(safe_log)
+str(y)
+```
+
+This output would be easier to work with if we had two lists: one of all the errors and one of all the results.  Fortunately there's a purrr function that allows us to turn a list "inside out", `zip_n()`:
+
+```{r}
+str(y %>% zip_n())
+```
+
+It's up to you how to deal with these errors, but typically you'd start by looking at the values of `x` where `y` is an error or working with the values of y that are ok:
+
+```{r}
+error <- y %>% map_lgl(~is.null(.$result))
+x[error]
+y[!error] %>% map("result")
+```

 Challenge: read_csv all the files in this directory. Which ones failed
 and why? Potentially helpful digression into names() and bind_rows(id
@ -319,13 +347,10 @@ and why? Potentially helpful digression into names() and bind_rows(id
 ```{r, eval = FALSE}
 files <- dir("data", pattern = "\\.csv$")
 files %>%
-  setNames(basename(.)) %>%
-  map(read_csv) %>%
-  bind_rows(id = "name")
+  set_names(basename(.)) %>%
+  map_df(readr::read_csv, .id = "filename") %>%
 ```

-(maybe purrr needs set_names)
-
 ## Multiple inputs

 So far we've focussed on variants that differ primarily in their output. There is a family of useful variants that vary primarily in their input: `map2()`, `map3()` and `map_n()`.
--- a/r4ds.Rproj
+++ b/r4ds.Rproj
@ -14,3 +14,7 @@ LaTeX: XeLaTeX

 AutoAppendNewline: Yes
 StripTrailingWhitespace: Yes
+
+BuildType: Package
+PackageUseDevtools: Yes
+PackageInstallArgs: --no-multiarch --with-keep.source