fixes merge conflicts in model.Rmd

This commit is contained in:
Garrett 2016-03-31 15:55:21 -04:00
commit 546fcac17d
101 changed files with 2966 additions and 12440 deletions

15
.gitignore vendored
View File

@ -1,14 +1,9 @@
.Rproj.user
.Rhistory
_site
cache
temp.Rmd
/*.html
*_cache
*_files
figures
.Rapp.history
_main.Rmd
book_assets
.bundle
vendor
_main.rds
_book
*.html
search_index.json
libs

View File

@ -1,55 +1,21 @@
language: c
sudo: false
language: R
cache:
packages: true
directories:
- _book
addons:
apt:
sources:
- r-packages-precise
packages:
- r-base-dev
- r-recommended
- libxml2-dev
env:
global:
- R_LIBS_USER=$HOME/R/library
cache:
directories:
- $R_LIBS_USER
- vendor/bundle
- $HOME/.ccache
install:
# Install binary pandoc from Rstudio
- mkdir $HOME/pandoc
- curl -O https://s3.amazonaws.com/rstudio-buildtools/pandoc-1.12.3.zip
- unzip -j pandoc-1.12.3.zip pandoc-1.12.3/linux/debian/x86_64/pandoc
-d $HOME/pandoc
- chmod +x $HOME/pandoc/pandoc
- rm pandoc-1.12.3.zip
- export PATH="$HOME/pandoc:$PATH"
- pandoc --version
# Install ruby gems
- bundle install --jobs=3 --retry=3 --deployment
# Install R packages
- mkdir -p "$R_LIBS_USER"
- Rscript -e 'if (length(find.package("devtools", quiet = TRUE)) == 0L) { install.packages("devtools", repos = "http://cran.rstudio.com") }'
- Rscript -e 'devtools::install_github("hadley/devtools")'
- Rscript -e 'devtools::install_deps(repos = "http://cran.rstudio.com", dependencies = TRUE)'
script: bundle exec jekyll build
after_success:
- cp -r figures/ _site/figures
script: ./_build.sh
deploy:
skip_cleanup: true
provider: s3
access_key_id: AKIAJ6PXDYWD72R6HBYA
secret_access_key:
secure: "KB6D4dRFyqABOUBC6q6CTI7WZQ+4kFOSDWNQFAbXJQR4TzR8J6uddAiSZyG8T1/8z+9Lm1VK417Zi0dGm3r3epbSnLClitBetvE11DoByomK+ey+NJ0MdXuXbFCJhX9l+8QDbDRLd/b2MEr36JXNaNQaLf5wdHImVVfcCm5STAIOM42plYMvz4Uhao+VjIKo+0IqiGHQHsNcU4qQXS4jd4FtO/t1xCwa7SgH0wwV2yJmeh8mM7QpmUEpBcZTHDvqZu6BitxtkYQDCh1iuBwhbPlYug/WOtyHmKYgU/c3+C+xW4OLv10OsE+eK6noEzIXQ80sPIyKMpkn+9P+7MnoRU/oZTXmYJOuXE5mvy+CiJ4TzZZxzB/g8HzklRRI4eFBmJ/zTTMmJMwBdbUhCXepARe4gr7pDFKhSTXvBVxljJBrkiGz6W1JeZ9nKzUbuIlWNJ9aaYM2UDMbRef7xyKlKbBNw1+90aTTW8Jo+0Sz3/R7daBTcnr0Bszg4QCaOMoxJJF/Ty/tTHiComAt/kNRqlSiU2g/Ch0jOz5TRV3c29OjQQ/a9ftf5pqlvgStwjjszgHQfRrd4mxGq2E/1gkPGL7ada+TWPAVjCc8HtPGK/36IjSccFB6qGkwTFf3uOBmAC2XVnJJlwG8v20nL5ZZwpCCbQANeQq/ILQsYUmk7RM="
bucket: r4ds.had.co.nz
local-dir: _site
skip_cleanup: true
local-dir: _book

View File

@ -19,6 +19,7 @@ Imports:
jpeg,
jsonlite,
Lahman,
lubridate,
knitr,
maps,
microbenchmark,
@ -29,6 +30,7 @@ Imports:
readr,
rcorpora,
stringr,
tibble,
tidyr
Remotes:
gaborcsardi/rcorpora,
@ -36,6 +38,5 @@ Remotes:
hadley/purrr,
hadley/stringr,
hadley/ggplot2,
hadley/bookdown,
hadley/nycflights13,
yihui/knitr
rstudio/bookdown

View File

@ -1,3 +0,0 @@
source 'https://rubygems.org'
gem 'mime-types'
gem 'jekyll', '~>2.5.3'

View File

@ -1,69 +0,0 @@
GEM
remote: https://rubygems.org/
specs:
blankslate (2.1.2.4)
classifier-reborn (2.0.4)
fast-stemmer (~> 1.0)
coffee-script (2.4.1)
coffee-script-source
execjs
coffee-script-source (1.9.1.1)
colorator (0.1)
execjs (2.6.0)
fast-stemmer (1.0.2)
ffi (1.9.10)
jekyll (2.5.3)
classifier-reborn (~> 2.0)
colorator (~> 0.1)
jekyll-coffeescript (~> 1.0)
jekyll-gist (~> 1.0)
jekyll-paginate (~> 1.0)
jekyll-sass-converter (~> 1.0)
jekyll-watch (~> 1.1)
kramdown (~> 1.3)
liquid (~> 2.6.1)
mercenary (~> 0.3.3)
pygments.rb (~> 0.6.0)
redcarpet (~> 3.1)
safe_yaml (~> 1.0)
toml (~> 0.1.0)
jekyll-coffeescript (1.0.1)
coffee-script (~> 2.2)
jekyll-gist (1.3.5)
jekyll-paginate (1.1.0)
jekyll-sass-converter (1.3.0)
sass (~> 3.2)
jekyll-watch (1.3.0)
listen (~> 3.0)
kramdown (1.9.0)
liquid (2.6.3)
listen (3.0.4)
rb-fsevent (>= 0.9.3)
rb-inotify (>= 0.9)
mercenary (0.3.5)
mime-types (2.6.2)
parslet (1.5.0)
blankslate (~> 2.0)
posix-spawn (0.3.11)
pygments.rb (0.6.3)
posix-spawn (~> 0.3.6)
yajl-ruby (~> 1.2.0)
rb-fsevent (0.9.6)
rb-inotify (0.9.5)
ffi (>= 0.5.0)
redcarpet (3.3.3)
safe_yaml (1.0.4)
sass (3.4.19)
toml (0.1.2)
parslet (~> 1.5.0)
yajl-ruby (1.2.1)
PLATFORMS
ruby
DEPENDENCIES
jekyll (~> 2.5.3)
mime-types
BUNDLED WITH
1.10.6

View File

@ -3,17 +3,14 @@
This is code and text behind the [R for data science](http://r4ds.had.co.nz)
book.
The site is built using [bookdown]
The site is built using [bookdown](https://github.com/rstudio/bookdown)
The R packages used in this book can be installed via
```{r}
devtools::install_github("yihui/knitr")
devtools::install_github("rstudio/bookdown")
devtools::install_github("hadley/r4ds")
```
jekyll, with a custom plugin to render `.rmd` files with
knitr and pandoc. To create the site, you need:
To create the site, you also need:
* jekyll gem: `gem install jekyll`
* bookdown: `install_github("hadley/bookdown")`
* [pandoc](http://johnmacfarlane.net/pandoc/)
* [knitr](http://yihui.name/knitr/): `install.packages("knitr")`

31
_bookdown.yml Normal file
View File

@ -0,0 +1,31 @@
new_session: yes
rmd_files: [
"index.rmd",
"intro.Rmd",
"understand.Rmd",
"visualize.Rmd",
"transform.Rmd",
"model.Rmd",
"variation.Rmd",
"work.Rmd",
"import.Rmd",
"tidy.Rmd",
"relational-data.Rmd",
"strings.Rmd",
"datetimes.Rmd",
"program.Rmd",
"pipes.Rmd",
"functions.Rmd",
"data-structures.Rmd",
"iteration.Rmd",
"hierarchy.Rmd",
"science.Rmd",
"model-vis.Rmd",
"model-assess.Rmd",
"communicate.Rmd",
"rmarkdown.Rmd",
"shiny.Rmd",
]
before_chapter_script: "_common.R"

2
_build.sh Executable file
View File

@ -0,0 +1,2 @@
#!/usr/bin/env Rscript
bookdown::render_book("index.rmd")

10
_common.R Normal file
View File

@ -0,0 +1,10 @@
set.seed(1014)
options(digits = 3)
knitr::opts_chunk$set(
comment = "#>",
collapse = TRUE,
cache = TRUE
)
options(dplyr.print_min = 6, dplyr.print_max = 6)

View File

@ -1,25 +0,0 @@
name: R for data science
markdown: redcarpet
highlighter: pygments
exclude: ["CONTRIBUTING.md", "README.md", "book", "vendor"]
# rmd_files: [
# "index.Rmd",
# "intro.Rmd",
# "visualize.Rmd",
# "transform.Rmd",
# "tidy.Rmd",
# "model.Rmd",
# "import.Rmd",
# "eda.Rmd",
# "rmarkdown.Rmd",
# "shiny.Rmd",
# "data-structures.Rmd",
# "functions.Rmd",
# "strings.Rmd",
# "datetimes.Rmd",
# "lists.Rmd",
# "model-vis.Rmd",
# "model-assess.Rmd",
# ]

BIN
_includes/.DS_Store vendored

Binary file not shown.

View File

@ -1,30 +0,0 @@
<li><a href="intro.html">Introduction</a></li>
<li class="dropdown-header"><a href="understand.html">Understand your data</a></li>
<li><a href="visualize.html">Visualize</a></li>
<li><a href="transform.html">Transform</a></li>
<li><a href="model.html">Model</a></li>
<li><a href="variation.html">Variation</a></li>
<li class="dropdown-header"><a href="work.html">Work with your data</a></li>
<li><a href="import.html">Import</a></li>
<li><a href="tidy.html">Tidy</a></li>
<li><a href="relational-data.html">Relational data</a></li>
<li><a href="strings.html">Strings</a></li>
<li><a href="datetimes.html">Dates and times</a></li>
<li class="dropdown-header"><a href="program.html">Save time with programming</a></li>
<li><a href="functions.html">Express yourself with code</a></li>
<li><a href="data-structures.html">Data structures</a></li>
<li><a href="lists.html">Lists and functional programming</a></li>
<li><a href="robust-code.html">Robust code</a></li>
<li class="dropdown-header"><a href="science.html">Do science with data</a></li>
<li><a href="model-vis.html">Discover</a></li>
<li><a href="model-assess.html">Predict</a></li>
<li>Test</li>
<li class="dropdown-header"><a href="communicate.html">Communicate your work</a></a></li>
<li><a href="rmarkdown.html">R Markdown</a></li>
<li><a href="shiny.html">Shiny</a></li>

View File

@ -1,82 +0,0 @@
<!DOCTYPE html>
<html>
<head>
<title>{{page.title}} &middot; R for Data Science</title>
<meta charset="utf-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<link href="www/bootstrap.min.css" rel="stylesheet">
<link href="www/highlight.css" rel="stylesheet">
<script src="www/jquery-1.11.0/jquery.min.js"></script>
<script src="www/htmlwidgets-0.5/htmlwidgets.js"></script>
<link href="www/str_view-0.1.0/str_view.css" rel="stylesheet" />
<script src="www/str_view-binding-1.0.0.9000/str_view.js"></script>
<link href='http://fonts.googleapis.com/css?family=Inconsolata:400,700'
rel='stylesheet' type='text/css'>
</head>
<body>
<div class="container">
<div class="masthead">
<ul class="nav nav-pills pull-right">
<li class="dropdown">
<a href="#" class="dropdown-toggle" data-toggle="dropdown">
Table of contents <b class="caret"></b>
</a>
<ul class="dropdown-menu pull-right" role="menu">
{% include package-nav.html %}
</ul>
</li>
</ul>
<h3 class="muted"><a href="/">R for data science</a> <small>by Garrett Grolemund and Hadley Wickham</small></h3>
<hr>
</div>
<div class="row">
<div class="col-xs-12 col-sm-3" id="nav">
<h4>Contents</h4>
<ul class="list-unstyled" id="toc"></ul>
<hr>
<p><a href="/contribute.html">How to contribute</a></p>
<p><a class="btn btn-primary" href="https://github.com/hadley/r4ds/edit/master/{{page.path}}">Edit this page</a></p>
</div>
<div id="content" class="col-xs-12 col-sm-8 pull-right">
{{ content }}
</div>
</div>
<div class="footer">
<hr>
<p>&copy; Garrett Grolemund &amp; Hadley Wickham. Powered by <a href="http://jekyllrb.com/">jekyll</a>,
<a href="http://yihui.name/knitr/">knitr</a>, and
<a href="http://johnmacfarlane.net/pandoc/">pandoc</a>. Source
available on <a href="https://github.com/hadley/r4ds/">github</a>.
</p>
</div>
</div> <!-- /container -->
<script src="//code.jquery.com/jquery.js"></script>
<script src="www/bootstrap.min.js"></script>
<script src="www/toc.js"></script>
<script>
(function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
(i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o),
m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
})(window,document,'script','//www.google-analytics.com/analytics.js','ga');
ga('create', 'UA-67989-17', 'auto');
ga('send', 'pageview');
</script>
</body>
</html>

View File

@ -1,9 +0,0 @@
---
layout: default
---
<h2>{{ page.title }}</h2>
<p class="meta">{{ page.date | date_to_string }}</p>
<div class="post">
{{ content }}
</div>

11
_output.yaml Normal file
View File

@ -0,0 +1,11 @@
bookdown::gitbook:
config:
toc:
collapse: section
before: |
<li><strong><a href="./">R for Data Science</a></strong></li>
edit:
link: https://github.com/hadley/r4ds/edit/master/%s
text: "Edit"
sharing: no
css: r4ds.css

View File

@ -1,24 +0,0 @@
#!/usr/bin/Rscript
library(rmarkdown)
library(bookdown)
library(methods)
args <- commandArgs(trailingOnly = TRUE)
path <- args[1]
if (!file.exists(path)) {
stop("Can't find path ", path, call. = FALSE)
}
if (file.access(path, 4) != 0) {
stop("Can't read path ", path, call. = FALSE)
}
html_path <- render(path, html_chapter(raw = TRUE, toc = "toc.rds"),
quiet = TRUE)
read_file <- function(path) {
size <- file.info(path)$size
readChar(path, size, useBytes = TRUE)
}
cat(read_file(html_path))

View File

@ -1,33 +0,0 @@
require 'tempfile'
module Jekyll
class RMarkdownConverter < Converter
safe :false
priority :high
def matches(ext)
ext =~ /^\.(rmd|rmarkdown)$/i
end
def output_ext(ext)
".html"
end
def convert(content)
f = File.new("temp.Rmd", "w")
f.write(content)
f.write("\n")
f.flush
# http://rubyquicktips.com/post/5862861056/execute-shell-commands
content = `_plugins/knit.r temp.Rmd`
if $?.exitstatus != 0
raise "Knitting failed"
end
content
# File.unlink f.path
end
end
end

View File

@ -1,7 +0,0 @@
set.seed(1014)
options(digits = 3)
knitr::opts_chunk$set(
comment = "#>",
collapse = TRUE
)

View File

@ -1,6 +1,3 @@
---
layout: default
title: Communicate your work
---
# Communicate your work
Reproducible, literate code is the data science equivalent of the Scientific Report (i.e, Intro, Methods and materials, Results, Discussion).

View File

@ -1,8 +1,3 @@
---
title: Contributing
layout: default
---
# Contributing
This book has been developed in the open, and it wouldn't be nearly as good

BIN
cover.png

Binary file not shown.

Before

Width:  |  Height:  |  Size: 269 KiB

After

Width:  |  Height:  |  Size: 202 KiB

View File

@ -1,20 +1,464 @@
---
layout: default
title: Data structures
---
# Data structures
Might be quite brief.
```{r, include = FALSE}
library(purrr)
library(dplyr)
```
Atomic vectors and lists. What is a data frame?
As you start to write more functions, and as you want your functions to work with more types of inputs, it's useful to have some grounding in the underlying data structures that R is built on. This chapter will dive deeper into the objects that you've already used, helping you better understand how things work.
`typeof()` vs. `class()` mostly in context of how date/times and factors are built on top of simpler structures.
The most important class of objects in R is the __vector__. Every vector has two key properties:
## Factors
1. Its type, whether it's logical, numeric, character, and so on. You
can determine the type of any R object with `typeof()`.
(Since won't get a chapter of their own)
2. Its length, which you can retrieve with `length()`.
Vectors are broken down into __atomic__ vectors, and __lists__. I call factors, dates, and date times __augmented vectors__ because they're built on top of atomic vectors. Data frames are also augmented vectors as they built on top of lists.
Note that R does not have "scalars". In R, a single number is a vector of length 1. The impacts of this are mostly on how functions work. Because there are no scalars, most built-in functions are vectorised, meaning that they will operate on a vector of numbers. That's why, for example, you can write `1:10 + 10:1`.
## Atomic vectors
There are four important types of atomic vector:
* logical
* integer
* double
* character
Collectively, integer and double vectors are known as numeric vectors. Most of the time the distinction between integers and doubles is not important in R, so we'll discuss them together.
(There are also two rarer atomic vectors: raw and complex. They're beyond the scope of this book because they are rarely needed to do data analysis)
### Logical
Logical vectors are the simplest type of atomic vector because they can take only three possible values: `FALSE`, `TRUE`, and `NA`. Logical vectors are usually constructed with comparison operators, as described in [comparisons].
In numeric contexts, `TRUE` is converted to `1`, `FALSE` converted to 0. That means the sum of a logical vector is the number of trues, and the mean of a logical vector is the proportion of trues.
```{r}
x <- sample(20, 100, replace = TRUE)
y <- x > 10
sum(y)
mean(y)
```
### Numeric
Numeric vectors encompasses both integers and doubles (real numbers). For large data, there is some small advantage to using the integer data type if you really have integers, but in most cases the differences are immaterial. In R, numbers are doubles by default. To make an integer, use a `L` after the number:
```{r}
typeof(1)
typeof(1L)
```
There are two cases where you need to be aware of the differences between doubles and integers. Firstly, never test for equality on a double. There can be very small differences that don't print out by default. These differences arise because a double is represented using a fixed number of (binary) digits. For example, what should you get if you square the square-root of two?
```{r}
x <- sqrt(2) ^ 2
x
```
It certainly looks like we get what we expect: `2`. But things are not exactly as they seem:
```{r}
x == 2
x - 2
```
The number we've computed is actually slightly different to 2. To avoid this sort of comparison difficulty, you can use the `near()` function from dplyr (available in 0.5).
```{r, eval = packageVersion("dplyr") >= "0.4.3.9000"}
dplyr::near(x, 2)
```
The other important thing to know about doubles is that they have three special values in addition to `NA`:
```{r}
c(-1, 0, 1) / 0
```
Like with missing values, you should avoid using `==` to check for these other special values. Instead use `is.finite()`, `is.infinite()`, and `is.nan()`:
| | 0 | Inf | NA | NaN |
|------------------|-----|-----|-----|-----|
| `is.finite()` | x | | | |
| `is.infinite()` | | x | | |
| `is.na()` | | | x | x |
| `is.nan()` | | | | x |
Note that `is.finite(x)` is not the same as `!is.infinite(x)`.
### Character
Each element of a character vector is a string.
```{r}
x <- c("abc", "def", "ghijklmnopqrs")
typeof(x)
```
You learned how to manipulate these vectors in [strings].
R uses a global string pool. This reduces the amount of memory strings take up because
```{r}
x <- "This is a reasonably long string."
pryr::object_size(x)
y <- rep(x, 1000)
pryr::object_size(y)
```
`y` doesn't take up 1,000x as much memory as `x`, because each element of `y` is just a pointer to that same string. A pointer is 8 bytes, so 1000 pointers to a 136 B string is about 8.13 kB.
### Missing values
There are four types of missing value, one for each type of atomic vector:
```{r}
NA # logical
NA_integer_ # integer
NA_real_ # double
NA_character_ # character
```
It is not usually necessary to know about these different types because in most cases `NA` is automatically converted to the type that you need. However, there are some functions that are strict about their inputs, and you'll need to give them an missing value of the correct type.
## Subsetting
Not sure where else this should be covered.
## Augmented vectors
There are three important types of vector that are built on top of atomic vectors: factors, dates, and date times. I call these augmented vectors, because they are atomic vectors with additional __attributes__. Attributes are a way of adding arbitrary additional metadata to a vector. Each attribute is a named vector. You can get and set individual attribute values with `attr()` or see them all at once with `attributes()`.
```{r}
x <- 1:10
attr(x, "greeting")
attr(x, "greeting") <- "Hi!"
attr(x, "farewell") <- "Bye!"
attributes(x)
```
There are three very important attributes that are used to implement fundamental parts of R:
* "names" are used to name the elements of a vector.
* "dims" make a vector behave like a matrix or array.
* "class" is used to implemenet the S3 object oriented system.
Class is particularly important because it changes what __generic functions__ do with the object. Generic functions are key to OO in R. Here's what a typical generic function looks like:
```{r}
as.Date
```
The call to "UseMethod" means that this is a generic function, and it will call a specific __method__, based on the class of the first argument. You can list all the methods for a generic with `methods()`:
```{r}
methods("as.Date")
```
And you can see the specific implementation of a method with `getS3method()`:
```{r}
getS3method("as.Date", "default")
getS3method("as.Date", "numeric")
```
The most important S3 generic is `print()`: it controls how the object is printed when you type its name on the console. Other important generics are the subsetting functions `[`, `[[`, and `$`.
A detailed discussion of S3 is beyond the scope of this book, but you can read more about it at <http://adv-r.had.co.nz/OO-essentials.html#s3>.
### Factors
Factors are designed to represent categorical data that can take a fixed set of possible values. Factors are built on top of integers, and have a levels attribute:
```{r}
x <- factor(c("ab", "cd", "ab"), levels = c("ab", "cd", "ef"))
typeof(x)
attributes(x)
```
Historically, factors were much easier to work with than characters so many functions in base R automatically convert characters to factors (controlled by the dread `stringsAsFactors` argument). To get more historical context, you might want to read [stringsAsFactors: An unauthorized biography](http://simplystatistics.org/2015/07/24/stringsasfactors-an-unauthorized-biography/) by Roger Peng or [stringsAsFactors = \<sigh\>](http://notstatschat.tumblr.com/post/124987394001/stringsasfactors-sigh) by Thomas Lumley. The motivation for factors is the modelling context. If you're going to fit a model to categorical data, you need to know in advance all the possible values. There's no way to make a prediction for "green" if all you've ever seen is "red", "blue", and "yellow"
The packages in this book keep characters as is, but you will need to deal with them if you are working with base R or many other packages. When you encounter a factor, you should first check to see if you can avoid creating it in the first. Often there will be `stringsAsFactors` argument that you can set to `FALSE`. Otherwise, you can apply `as.character()` to the column to explicitly turn back into a factor.
```{r}
x <- factor(letters[1:5])
is.factor(x)
as.factor(letters[1:5])
```
### Dates and date times
Dates in R are numeric vectors (sometimes integers, sometimes doubles) that represent the number of days since 1 January 1970.
```{r}
x <- as.Date("1971-01-01")
unclass(x)
typeof(x)
attributes(x)
```
Date times are numeric vectors (sometimes integers, sometimes doubles) that represent the number of seconds since 1 January 1970:
```{r}
x <- lubridate::ymd_hm("1970-01-01 01:00")
unclass(x)
typeof(x)
attributes(x)
```
The `tzone` is optional, and only controls the way the date is printed not what it means.
There is another type of datetimes called POSIXlt. These are built on top of named lists.
```{r}
y <- as.POSIXlt(x)
typeof(y)
attributes(y)
```
If you use the packages outlined in this book, you should never encounter a POSIXlt. They do crop up in base R, because they are used extract specific components of a date (like the year or month). However, lubridate provides helpers for you to do this instead. Otherwise POSIXct's are always easier to work with, so if you find you have a POSIXlt, you should always convert it to a POSIXct with `as.POSIXct()`.
## Recursive vectors (lists)
Lists are the data structure R uses for hierarchical objects. You can create a hierarchical structure with a list because unlike vectors, a list can contain other lists.
You create a list with `list()`:
```{r}
x <- list(1, 2, 3)
str(x)
x_named <- list(a = 1, b = 2, c = 3)
str(x_named)
```
Unlike atomic vectors, `lists()` can contain a mix of objects:
```{r}
y <- list("a", 1L, 1.5, TRUE)
str(y)
```
Lists can even contain other lists!
```{r}
z <- list(list(1, 2), list(3, 4))
str(z)
```
`str()` is very helpful when looking at lists because it focusses on the structure, not the contents.
### Visualising lists
To explain more complicated list manipulation functions, it's helpful to have a visual representation of lists. For example, take these three lists:
```{r}
x1 <- list(c(1, 2), c(3, 4))
x2 <- list(list(1, 2), list(3, 4))
x3 <- list(1, list(2, list(3)))
```
I draw them as follows:
```{r, echo = FALSE, out.width = "75%"}
knitr::include_graphics("diagrams/lists-structure.png")
```
* Lists are rounded rectangles that contain their children.
* I draw each child a little darker than its parent to make it easier to see
the hierarchy.
* The orientation of the children (i.e. rows or columns) isn't important,
so I'll pick a row or column orientation to either save space or illustrate
an important property in the example.
### Subsetting
There are three ways to subset a list, which I'll illustrate with `a`:
```{r}
a <- list(a = 1:3, b = "a string", c = pi, d = list(-1, -5))
```
* `[` extracts a sub-list. The result will always be a list.
```{r}
str(a[1:2])
str(a[4])
```
Like subsetting vectors, you can use an integer vector to select by
position, or a character vector to select by name.
* `[[` extracts a single component from a list. It removes a level of
hierarchy from the list.
```{r}
str(y[[1]])
str(y[[4]])
```
* `$` is a shorthand for extracting named elements of a list. It works
similarly to `[[` except that you don't need to use quotes.
```{r}
a$a
a[["b"]]
```
Or visually:
```{r, echo = FALSE, out.width = "75%"}
knitr::include_graphics("diagrams/lists-subsetting.png")
```
### Lists of condiments
It's easy to get confused between `[` and `[[`, but it's important to understand the difference. A few months ago I stayed at a hotel with a pretty interesting pepper shaker that I hope will help you remember these differences:
```{r, echo = FALSE, out.width = "25%"}
knitr::include_graphics("images/pepper.jpg")
```
If this pepper shaker is your list `x`, then, `x[1]` is a pepper shaker containing a single pepper packet:
```{r, echo = FALSE, out.width = "25%"}
knitr::include_graphics("images/pepper-1.jpg")
```
`x[2]` would look the same, but would contain the second packet. `x[1:2]` would be a pepper shaker containing two pepper packets.
`x[[1]]` is:
```{r, echo = FALSE, out.width = "25%"}
knitr::include_graphics("images/pepper-2.jpg")
```
If you wanted to get the content of the pepper package, you'd need `x[[1]][[1]]`:
```{r, echo = FALSE, out.width = "25%"}
knitr::include_graphics("images/pepper-3.jpg")
```
### Exercises
1. Draw the following lists as nested sets.
1. Generate the lists corresponding to these nested set diagrams.
1. What happens if you subset a data frame as if you're subsetting a list?
What are the key differences between a list and a data frame?
## Data frames
Data frames are augmented lists: they have class "data.frame", and `names` (column) and `row.names` attributes:
```{r}
df1 <- data.frame(x = 1:5, y = 5:1)
typeof(df1)
attributes(df1)
```
The difference between a data frame and a list is that all the elements of a data frame must be the same length. All functions that work with data frames enforce this constraint.
Generally, I recommend using `dplyr::data_frame()` instead of `data.frame`. It creates an object that "extends" the data frame. That means it has all the existing behaviour of a data frame:
```{r}
df2 <- dplyr::data_frame(x = 1:5, y = 5:1)
typeof(df2)
attributes(df2)
```
The additional `tbl_df` class makes the print method more informative (and only prints the first 10 rows, not the first 10,000), and makes the subsetting methods more strict:
```{r, error = TRUE}
df1
df2
df1$z
df2$z
```
There are a few other ways in `data_frame()` behaves differently to `data.frame()`
* `data.frame()` does a number of transformations to its inputs. For example,
unless you `stringsAsFactors = FALSE` it always converts character vectors
to factors. `data_frame()` does not conversion:
```{r}
data.frame(x = letters) %>% sapply(class)
data_frame(x = letters) %>% sapply(class)
```
* `data.frame()` automatically transforms names, `data_frame()` does not.
```{r}
data.frame(`crazy name` = 1) %>% names()
data_frame(`crazy name` = 1) %>% names()
```
* In `data_frame()` you can refer to variables that you just created:
```{r}
data_frame(x = 1:5, y = x ^ 2)
```
* It never uses row names. The whole point of tidy data is to store variables
in a consistent way. Row names are a variable stored in a unique way,
so I don't recommend using them.
* It only recycles vectors of length 1. Recycling vectors of greater lengths
is a frequent source of silent mistakes.
```{r, error = TRUE}
data.frame(x = 1:2, y = 1:4)
data_frame(x = 1:2, y = 1:4)
```
## Predicates
| | lgl | int | dbl | chr | list | null |
|------------------|-----|-----|-----|-----|------|------|
| `is_logical()` | x | | | | | |
| `is_integer()` | | x | | | | |
| `is_double()` | | | x | | | |
| `is_numeric()` | | x | x | | | |
| `is_character()` | | | | x | | |
| `is_atomic()` | x | x | x | x | | |
| `is_list()` | | | | | x | |
| `is_vector()` | x | x | x | x | x | |
| `is_null()` | | | | | | x |
Compared to the base R functions, they only inspect the type of the object, not its attributes. This means they tend to be less surprising:
```{r}
is.atomic(NULL)
is_atomic(NULL)
is.vector(factor("a"))
is_vector(factor("a"))
```
I recommend using these instead of the base functions.
Each predicate also comes with "scalar" and "bare" versions. The scalar version checks that the length is 1 and the bare version checks that the object is a bare vector with no S3 class.
```{r}
y <- factor(c("a", "b", "c"))
is_integer(y)
is_scalar_integer(y)
is_bare_integer(y)
```
### Exercises
1. Carefully read the documentation of `is.vector()`. What does it actually
test for?

View File

@ -1,7 +1,4 @@
---
layout: default
title: Databases
---
# Databases
### Two-table verbs

View File

@ -1,9 +1,3 @@
---
layout: default
title: Dates and times
---
# Dates and times
If you have trouble remembering these abbreviations, check out the [strptimer package](https://cran.r-project.org/web/packages/strptimer/vignettes/strptimer.html).

Binary file not shown.

Before

Width:  |  Height:  |  Size: 34 KiB

After

Width:  |  Height:  |  Size: 35 KiB

Binary file not shown.

File diff suppressed because it is too large Load Diff

138
hierarchy.Rmd Normal file
View File

@ -0,0 +1,138 @@
# Handling hierarchy {#hierarchy}
```{r setup, include=FALSE}
library(purrr)
```
<!--
## Warm ups
* What does this for loop do?
* How is a data frame like a list?
* What does `mean()` mean? What does `mean` mean?
* How do you get help about the $ function? How do you normally write
`[[`(mtcars, 1) ?
* Argument order
-->
The map functions apply a function to every element in a list. They are the most commonly used part of purrr, but not the only part. Since lists are often used to represent complex hierarchies, purrr also provides tools to work with hierarchy:
* You can extract deeply nested elements in a single call by supplying
a character vector to the map functions.
* You can remove a level of the hierarchy with the flatten functions.
* You can flip levels of the hierarchy with the transpose function.
## Extracting deeply nested elements
Some times you get data structures that are very deeply nested. A common source of such data is JSON from a web API. I've previously downloaded a list of GitHub issues related to this book and saved it as `issues.json`. Now I'm going to load it into a list with jsonlite. By default `fromJSON()` tries to be helpful and simplifies the structure a little for you. Here I'm going to show you how to do it with purrr, so I set `simplifyVector = FALSE`:
```{r}
# From https://api.github.com/repos/hadley/r4ds/issues
issues <- jsonlite::fromJSON("issues.json", simplifyVector = FALSE)
```
There are eight issues, and each issue is a nested list:
```{r}
length(issues)
str(issues[[1]])
```
To work with this sort of data, you typically want to turn it into a data frame by extracting the related vectors that you're most interested in:
```{r}
issues %>% map_int("id")
issues %>% map_lgl("locked")
issues %>% map_chr("state")
```
You can use the same technique to extract more deeply nested structure. For example, imagine you want to extract the name and id of the user. You could do that in two steps:
```{r}
users <- issues %>% map("user")
users %>% map_chr("login")
users %>% map_int("id")
```
But by supplying a character _vector_ to `map_*`, you can do it in one:
```{r}
issues %>% map_chr(c("user", "login"))
issues %>% map_int(c("user", "id"))
```
## Removing a level of hierarchy
As well as indexing deeply into hierarchy, it's sometimes useful to flatten it. That's the job of the flatten family of functions: `flatten()`, `flatten_lgl()`, `flatten_int()`, `flatten_dbl()`, and `flatten_chr()`. In the code below we take a list of lists of double vectors, then flatten it to a list of double vectors, then to a double vector.
```{r}
x <- list(list(a = 1, b = 2), list(c = 3, d = 4))
str(x)
y <- flatten(x)
str(y)
flatten_dbl(y)
```
Graphically, that sequence of operations looks like:
```{r, echo = FALSE}
knitr::include_graphics("diagrams/lists-flatten.png")
```
Whenever I get confused about a sequence of flattening operations, I'll often draw a diagram like this to help me understand what's going on.
Base R has `unlist()`, but I recommend avoiding it for the same reason I recommend avoiding `sapply()`: it always succeeds. Even if your data structure accidentally changes, `unlist()` will continue to work silently the wrong type of output. This tends to create problems that are frustrating to debug.
## Switching levels in the hierarchy {#transpose}
Other times the hierarchy feels "inside out". You can use `transpose()` to flip the first and second levels of a list:
```{r}
x <- list(
x = list(a = 1, b = 3, c = 5),
y = list(a = 2, b = 4, c = 6)
)
x %>% str()
x %>% transpose() %>% str()
```
Graphically, this looks like:
```{r, echo = FALSE, out.width = "75%"}
knitr::include_graphics("diagrams/lists-transpose.png")
```
You'll see an example of this in the next section, as `transpose()` is particularly useful in conjunction with adverbs like `safely()` and `quietly()`.
It's called transpose by analogy to matrices. When you subset a transposed matrix, you switch indices: `x[i, j]` is the same as `t(x)[j, i]`. It's the same idea when transposing a list, but the subsetting looks a little different: `x[[i]][[j]]` is equivalent to `transpose(x)[[j]][[i]]`. Similarly, a transpose is its own inverse so `transpose(transpose(x))` is equal to `x`.
Transpose is also useful when working with JSON APIs. Many JSON APIs represent data frames in a row-based format, rather than R's column-based format. `transpose()` makes it easy to switch between the two:
```{r}
df <- dplyr::data_frame(x = 1:3, y = c("a", "b", "c"))
df %>% transpose() %>% str()
```
## Turning lists into data frames
* Have a deeply nested list with missing pieces
* Need a tidy data frame so you can visualise, transform, model etc.
* What do you do?
* By hand with purrr, talk about `fromJSON` and `tidyJSON`
* tidyjson
### Exercises
1. Challenge: read all the csv files in a directory. Which ones failed
and why?
```{r, eval = FALSE}
files <- dir("data", pattern = "\\.csv$")
files %>%
set_names(., basename(.)) %>%
map_df(safely(readr::read_csv), .id = "filename") %>%
```

View File

@ -1,8 +1,3 @@
---
layout: default
title: Data import
---
# Data import
```{r, include = FALSE}
@ -27,14 +22,14 @@ There are many ways to read flat files into R. If you've be using R for a while,
* These functions are typically much faster (~10x) than the base equivalents.
Long run running jobs also have a progress bar, so you can see what's
happening. (If you're looking for raw speed, try `data.table::fread()`,
happening. (If you're looking for raw speed, try `data.table::fread()`,
it's slightly less flexible than readr, but can be twice as fast.)
* They have more flexible parsers: they can read in dates, times, currencies,
percentages, and more.
* They fail to do some annoying things like converting character vectors to
factors, munging the column headers to make sure they're valid R
percentages, and more.
* They fail to do some annoying things like converting character vectors to
factors, munging the column headers to make sure they're valid R
variable names, and using row names.
* They return objects with class `tbl_df`. As you saw in the dplyr chapter,
@ -45,24 +40,24 @@ There are many ways to read flat files into R. If you've be using R for a while,
sometimes need to supply a few more arguments when using them the first
time, but they'll definitely work on other peoples computers. The base R
functions take a number of settings from system defaults, which means that
code that works on your computer might not work on someone elses.
code that works on your computer might not work on someone else's.
Make sure you have the readr package (`install.packages("readr")`).
Most of readr's functions are concerned with turning flat files into data frames:
* `read_csv()` read comma delimited files, `read_csv2()` reads semi-colon
* `read_csv()` reads comma delimited files, `read_csv2()` reads semi-colon
separated files (common in countries where `,` is used as the decimal place),
`read_tsv()` reads tab delimited files, and `read_delim()` reads in files
with a user supplied delimiter.
* `read_fwf()` reads fixed width files. You can specify fields either by their
widths with `fwf_widths()` or theirs position with `fwf_positions()`.
widths with `fwf_widths()` or their position with `fwf_positions()`.
`read_table()` reads a common variation of fixed width files where columns
are separated by white space.
* `read_log()` reads Apache style logs. (But also check out
[webreadr](https://github.com/Ironholds/webreadr) which is built on top
[webreadr](https://github.com/Ironholds/webreadr) which is built on top
of `read_log()`, but provides many more helpful tools.)
readr also provides a number of functions for reading files off disk into simpler data structures:
@ -73,29 +68,29 @@ readr also provides a number of functions for reading files off disk into simple
These might be useful for other programming tasks.
As well as reading data frame disk, readr also provides tools for working with data frames and character vectors in R:
As well as reading data from disk, readr also provides tools for working with data frames and character vectors in R:
* `type_convert()` applies the same parsing heuristics to the character columns
in a data frame. You can override its choices using `col_types`.
For the rest of this chapter we'll focus on `read_csv()`. If you understand how to use this function, it will be straightforward to your knowledge to all the other functions in readr.
### Basics
The first two arguments of `read_csv()` are:
* `file`: path (or URL) to the file you want to load. Readr can automatically
* `file`: path (or URL) to the file you want to load. Readr can automatically
decompress files ending in `.zip`, `.gz`, `.bz2`, and `.xz`. This can also
be a literal csv file, which is useful for experimenting and creating
reproducible examples.
* `col_names`: column names. There are three options:
* `TRUE` (the default), which reads column names from the first row
* `TRUE` (the default), which reads column names from the first row
of the file
* `FALSE` number columns sequentially from `X1` to `Xn`.
* `FALSE` numbers columns sequentially from `X1` to `Xn`.
* A character vector, used as column names. If these don't match up
with the columns in the data, you'll get a warning message.
@ -109,7 +104,7 @@ EXAMPLE
Typically, you'll see a lot of warnings if readr has guessed the column type incorrectly. This most often occurs when the first 1000 rows are different to the rest of the data. Perhaps there are a lot of missing data there, or maybe your data is mostly numeric but a few rows have characters. Fortunately, it's easy to fix these problems using the `col_type` argument.
(Note that if you have a very large file, you might want to set `n_max` to 10,000 or 100,000. That will speed up iteration while you're finding common problems)
(Note that if you have a very large file, you might want to set `n_max` to 10,000 or 100,000. That will speed up iterations while you're finding common problems)
Specifying the `col_type` looks like this:
@ -122,24 +117,24 @@ read_csv("mypath.csv", col_types = col(
You can use the following types of columns
* `col_integer()` (i) and `col_double()` (d) specify integer and doubles.
* `col_integer()` (i) and `col_double()` (d) specify integer and doubles.
`col_logical()` (l) parses TRUE, T, FALSE and F into a logical vector.
`col_character()` (c) leaves strings as is.
`col_character()` (c) leaves strings as is.
* `col_number()` (n) is a more flexible parsed for numbers embedded in other
strings. It will look for the first number in a string, ignoring non-numeric
prefixes and suffixes. It will also ignoring the grouping mark specified by
* `col_number()` (n) is a more flexible parsed for numbers embedded in other
strings. It will look for the first number in a string, ignoring non-numeric
prefixes and suffixes. It will also ignore the grouping mark specified by
the locale (see below for more details).
* `col_factor()` (f) allows you to load data directly into a factor if you know
* `col_factor()` (f) allows you to load data directly into a factor if you know
what the levels are.
* `col_skip()` (_, -) completely ignores a column.
* `col_date()` (D), `col_datetime()` (T) and `col_time()` (t) parse into dates,
* `col_date()` (D), `col_datetime()` (T) and `col_time()` (t) parse into dates,
date times, and times as described below.
You might have noticed that each column parser has a one letter abbreviation, which you can instead of the full function call (assuming you're happy with the default arguments):
You might have noticed that each column parser has a one letter abbreviation, which you can use instead of the full function call (assuming you're happy with the default arguments):
```{r, eval = FALSE}
read_csv("mypath.csv", col_types = cols(
@ -196,14 +191,14 @@ If these defaults don't work for your data you can supply your own date time for
* Seconds: `%S` (integer seconds), `%OS` (partial seconds).
* Time zone: `%Z` (as name, e.g. `America/Chicago`), `%z` (as offset from UTC,
e.g. `+0800`). If you're American, note that "EST" is a Canadian time zone
that does not have daylight savings time. It is \emph{not} Eastern Standard
* Time zone: `%Z` (as name, e.g. `America/Chicago`), `%z` (as offset from UTC,
e.g. `+0800`). If you're American, note that "EST" is a Canadian time zone
that does not have daylight savings time. It is \emph{not} Eastern Standard
Time!
* AM/PM indicator: `%p`.
* Non-digits: `%.` skips one non-digit charcter, `%*` skips any number of
* Non-digits: `%.` skips one non-digit character, `%*` skips any number of
non-digits.
The best way to figure out the correct string is to create a few examples in a character vector, and test with one of the parsing functions. For example:
@ -236,11 +231,11 @@ The settings you are most like to need to change are:
locale("fr")
locale("fr", asciify = TRUE)
```
* The character encoding used in the file. If you don't know the encoding
you can use `guess_encoding()`. It's not perfect, but if you have a decent
sample of text, it's likely to be able to figure it out.
Readr converts all strings into UTF-8 as this is safest to work with across
platforms. (It's also what every stringr operation does.)
@ -257,146 +252,3 @@ The settings you are most like to need to change are:
## Binary files
Needs to discuss how data types in different languages are converted to R. Similarly for missing values.
## Tibble diffs
`data_frame()` is a nice way to create data frames. It encapsulates best practices for data frames:
* It never changes an input's type (i.e., no more `stringsAsFactors = FALSE`!).
```{r}
data.frame(x = letters) %>% sapply(class)
data_frame(x = letters) %>% sapply(class)
```
This makes it easier to use with list-columns:
```{r}
data_frame(x = 1:3, y = list(1:5, 1:10, 1:20))
```
List-columns are most commonly created by `do()`, but they can be useful to
create by hand.
* It never adjusts the names of variables:
```{r}
data.frame(`crazy name` = 1) %>% names()
data_frame(`crazy name` = 1) %>% names()
```
* It evaluates its arguments lazily and sequentially:
```{r}
data_frame(x = 1:5, y = x ^ 2)
```
* It adds the `tbl_df()` class to the output so that if you accidentally print a large
data frame you only get the first few rows.
```{r}
data_frame(x = 1:5) %>% class()
```
* It changes the behaviour of `[` to always return the same type of object:
subsetting using `[` always returns a `tbl_df()` object; subsetting using
`[[` always returns a column.
You should be aware of one case where subsetting a `tbl_df()` object
will produce a different result than a `data.frame()` object:
```{r}
df <- data.frame(a = 1:2, b = 1:2)
str(df[, "a"])
tbldf <- tbl_df(df)
str(tbldf[, "a"])
```
* It never uses `row.names()`. The whole point of tidy data is to
store variables in a consistent way. So it never stores a variable as
special attribute.
* It only recycles vectors of length 1. This is because recycling vectors of greater lengths
is a frequent source of bugs.
### Coercion
To complement `data_frame()`, dplyr provides `as_data_frame()` to coerce lists into data frames. It does two things:
* It checks that the input list is valid for a data frame, i.e. that each element
is named, is a 1d atomic vector or list, and all elements have the same
length.
* It sets the class and attributes of the list to make it behave like a data frame.
This modification does not require a deep copy of the input list, so it's
very fast.
This is much simpler than `as.data.frame()`. It's hard to explain precisely what `as.data.frame()` does, but it's similar to `do.call(cbind, lapply(x, data.frame))` - i.e. it coerces each component to a data frame and then `cbinds()` them all together. Consequently `as_data_frame()` is much faster than `as.data.frame()`:
```{r}
l2 <- replicate(26, sample(100), simplify = FALSE)
names(l2) <- letters
microbenchmark::microbenchmark(
as_data_frame(l2),
as.data.frame(l2)
)
```
The speed of `as.data.frame()` is not usually a bottleneck when used interactively, but can be a problem when combining thousands of messy inputs into one tidy data frame.
### tbl_dfs vs data.frames
There are three key differences between tbl_dfs and data.frames:
* When you print a tbl_df, it only shows the first ten rows and all the
columns that fit on one screen. It also prints an abbreviated description
of the column type:
```{r}
data_frame(x = 1:1000)
```
You can control the default appearance with options:
* `options(dplyr.print_max = n, dplyr.print_min = m)`: if more than `n`
rows print `m` rows. Use `options(dplyr.print_max = Inf)` to always
show all rows.
* `options(dply.width = Inf)` will always print all columns, regardless
of the width of the screen.
* When you subset a tbl\_df with `[`, it always returns another tbl\_df.
Contrast this with a data frame: sometimes `[` returns a data frame and
sometimes it just returns a single column:
```{r}
df1 <- data.frame(x = 1:3, y = 3:1)
class(df1[, 1:2])
class(df1[, 1])
df2 <- data_frame(x = 1:3, y = 3:1)
class(df2[, 1:2])
class(df2[, 1])
```
To extract a single column it's use `[[` or `$`:
```{r}
class(df2[[1]])
class(df2$x)
```
* When you extract a variable with `$`, tbl\_dfs never do partial
matching. They'll throw an error if the column doesn't exist:
```{r, error = TRUE}
df <- data.frame(abc = 1)
df$a
df2 <- data_frame(abc = 1)
df2$a
```

View File

@ -1,23 +1,14 @@
---
layout: default
title: Welcome
knit: "bookdown::render_book"
title: "R for Data Science"
output:
bookdown::html_chapters:
lib_dir: "book_assets"
- bookdown::gitbook
---
# R for Data Science
# Welcome
This is the book site for __"R for data science"__. This book will teach you how to do data science with R: You'll learn how to get your data into R, get it into the most useful structure, transform it, visualise it and model it. In this book, you will find a practicum of skills for data science. Just as a chemist learns how to clean test tubes and stock a lab, you'll learn how to clean data and draw plots---and many other things besides. These are the skills that allow data science to happen, and here you will find the best practices for doing each of these things with R. You'll learn how to use the grammar of graphics, literate programming, and reproducible research to save time. You'll also learn how to manage cognitive resources to facilitate discoveries when wrangling, visualizing, and exploring data. (__R for Data Science__ was formally called __Data Science with R__ in __Hands-On Programming with R__)
This is the book site for __"R for data science"__. This book will teach you how to do data science with R: You'll learn how to get your data into R, get it into the most useful structure, transform it, visualise it and model it. In this book, you will find a practicum of skills for data science. Just as a chemist learns how to clean test tubes and stock a lab, you'll learn how to clean data and draw plots---and many other things besides. These are the skills that allow data science to happen, and here you will find the best practices for doing each of these things with R. You'll learn how to use the grammar of graphics, literate programming, and reproducible research to save time. You'll also learn how to manage cognitive resources to facilitate discoveries when wrangling, visualizing, and exploring data. (__R for Data Science__ was formerly called __Data Science with R__ in __Hands-On Programming with R__)
To be published by O'Reilly in July 2016.
<img src="cover.png" width="250" height="328" alt="Cover image" />
## Table of contents {#toc}
<ul class="toc">
{% include package-nav.html %}
</ul>

View File

@ -1,16 +1,10 @@
---
title: Introduction
layout: default
---
# Introduction
```{r setup-intro, include = FALSE}
source("common.R")
install.packages <- function(...) invisible()
```
Data science is an exciting discipline that allows you to turn raw data into understanding, insight, and knowledge. The goal of "R for Data Science" is to introduce you to the most important in R tools that you need to do data science. After reading this book, you'll have the tools to tackle a wide variety of data science challenges, using the best parts of R.
Data science is an exciting discipline that allows you to turn raw data into understanding, insight, and knowledge. The goal of "R for Data Science" is to introduce you to the most important R tools that you need to do data science. After reading this book, you'll have the tools to tackle a wide variety of data science challenges, using the best parts of R.
## What you will learn
@ -30,24 +24,24 @@ There are two main engines of knowledge generation: visualisation and modelling.
__Visualisation__ is a fundamentally human activity. A good visualisation will show you things that you did not expect, or raise new questions of the data. A good visualisation might also hint that you're asking the wrong question and you need to refine your thinking. In short, visualisations can surprise you. However, visualisations don't scale particularly well.
__Models__ are the complementary tools to visualisation. Models are a fundamentally mathematical or computation tool, so they generally scale well. Even when they don't, it's usually cheaper to buy more computers than it is to buy more brains. But every model makes assumptions, and by its very nature a model can not question its own assumptions. That means a model can not fundamentally surprise you.
__Models__ are the complementary tools to visualisation. Models are a fundamentally mathematical or computational tool, so they generally scale well. Even when they don't, it's usually cheaper to buy more computers than it is to buy more brains. But every model makes assumptions, and by its very nature a model can not question its own assumptions. That means a model cannot fundamentally surprise you.
The last step of data science is __communication__, an absolutely critical part of any data analysis project. It doesn't matter how well models and visualisation have led you to understand the data, unless you can commmunicate your results to other people.
There's one important toolset that's not shown in the diagram: programming. Programming is a cross-cutting tool that you use in every part of the project. You don't need to be an expert programmer to be a data scientist, but learning more about programming pays off. Becoming a better programmer will allow you automate common tasks, and solve new problems with greater ease.
There's one important toolset that's not shown in the diagram: programming. Programming is a cross-cutting tool that you use in every part of the project. You don't need to be an expert programmer to be a data scientist, but learning more about programming pays off. Becoming a better programmer will allow you to automate common tasks, and solve new problems with greater ease.
You'll use these tools in every data science project, but for most projects they're not enough. There's a rough 80-20 rule at play: you can probably tackle 80% of every project using the tools that we'll teach you, but you'll need more to tackle the remaining 20%. Throughout this book we'll point you to resources where you can learn more.
## How you will learn
The above description of the tools of data science was organised roughly around the order in which you use them in analysis (although of course you'll iterate through them multiple times). In our experience, however, this is not the best way to learn them:
The above description of the tools of data science is organised roughly around the order in which you use them in analysis (although of course you'll iterate through them multiple times). In our experience, however, this is not the best way to learn them:
* Starting with data ingest and tidying is sub-optimal because 80% of the time
it's routine and boring, and the other 20% of the time it's horrendously
frustrating. Instead, we'll start with visualisation and transformation on
data that's already been imported and tidied. That way, when you ingest
and tidy your own data, you'll be able to keep your motivation high because
you know the pain is worth it because of what you can accomplish once its
you know the pain is worth it because of what you can accomplish once it's
done.
* Some topics are best explained with other tools. For example, we believe that
@ -64,15 +58,15 @@ Within each chapter, we try and stick to a similar pattern: start with some moti
## What you won't learn
There are some important topics that this book doesn't cover. We believe it's important to stay ruthlessly focussed on the essentials so you can get up and running as quickly as possible. That means this book can't cover every important topic.
There are some important topics that this book doesn't cover. We believe it's important to stay ruthlessly focused on the essentials so you can get up and running as quickly as possible. That means this book can't cover every important topic.
### Big data
This book proudly focusses on small, in-memory datasets. This is the right place to start because you can't tackle big data unless you have experience with small data. The tools you learn in this book will easily handle hundreds of megabytes of data, and with a little care you can typically use them to work with 1-2 Gb of data. If you're routinely working larger data (10-100 Gb, say), you should learn more about [data.table](https://github.com/Rdatatable/data.table). We don't teach data.table here because it has a very concise interface that is harder to learn because it offers fewer linguistic cues. But if you're working with large data, the performance payoff is worth a little extra effort to learn it.
This book proudly focuses on small, in-memory datasets. This is the right place to start because you can't tackle big data unless you have experience with small data. The tools you learn in this book will easily handle hundreds of megabytes of data, and with a little care you can typically use them to work with 1-2 Gb of data. If you're routinely working with larger data (10-100 Gb, say), you should learn more about [data.table](https://github.com/Rdatatable/data.table). We don't teach data.table here because it has a very concise interface that is harder to learn because it offers fewer linguistic cues. But if you're working with large data, the performance payoff is worth a little extra effort to learn it.
Many big data problems are often small data problems in disguise. Often your complete dataset is big, but the data needed to answer a specific question is small. It's often possible to find a subset, subsample, or summary that fits in memory and still allows you to answer the question that you're interested in. The challenge here is finding the right small data, which often requires a lot of iteration. We'll touch on this idea in [transform](#transform).
Another class of big data problem consists of many small data problems. Each individual problem might fit in memory, but you have millions of them. For example, you might want to fit a model to each person in your dataset. That would be trivial if you had just 10 or 100 people, but instead you have a million. Fortunately each problem is independent (sometimes called embarassingly parallel), so you just need a system (like hadoop) that allows you to send different datasets to different computers for processing. Once you've figured out to how answer the question for a single subset using the tools described in this book, you can use packages like SparkR, rhipe, and ddr to solve it for the complete dataset.
Another class of big data problem consists of many small data problems. Each individual problem might fit in memory, but you have millions of them. For example, you might want to fit a model to each person in your dataset. That would be trivial if you had just 10 or 100 people, but instead you have a million. Fortunately each problem is independent (sometimes called embarassingly parallel), so you just need a system (like Hadoop) that allows you to send different datasets to different computers for processing. Once you've figured out how to answer the question for a single subset using the tools described in this book, you can use packages like SparkR, rhipe, and ddr to solve it for the complete dataset.
### Python
@ -86,7 +80,7 @@ This book focuses exclusively on structured data sets: collections of values tha
### Formal Statistics and Machine Learning
This book focusses on practical tools for understanding your data: visualization, modelling, and transformation. You can develop your understanding further by learning probability theory, statistical hypothesis testing, and machine learning methods; but we won't teach you those things here. There are many books that cover these topics, but few that integrate the other parts of the data science process. When you are ready, you can and should read books devoted to each of these topics. We recommend *Statistical Modeling: A Fresh Approach* by Danny Kaplan; *An Introduction to Statistical Learning* by James, Witten, Hastie, and Tibshirani; and *Applied Predictive Modeling* by Kuhn and Johnson.
This book focuses on practical tools for understanding your data: visualization, modelling, and transformation. You can develop your understanding further by learning probability theory, statistical hypothesis testing, and machine learning methods; but we won't teach you those things here. There are many books that cover these topics, but few that integrate the other parts of the data science process. When you are ready, you can and should read books devoted to each of these topics. We recommend *Statistical Modeling: A Fresh Approach* by Danny Kaplan; *An Introduction to Statistical Learning* by James, Witten, Hastie, and Tibshirani; and *Applied Predictive Modeling* by Kuhn and Johnson.
## Prerequisites
@ -94,7 +88,7 @@ We've made few assumptions about what you already know in order to get the most
To run the code in this book, you will need to install both R and the RStudio IDE, an application that makes R easier to use. Both are open source, free and easy to install:
1. Download R and install R, <https://www.r-project.org/alt-home/>.
1. Download and install R, <https://www.r-project.org/alt-home/>.
1. Download and install RStudio, <http://www.rstudio.com/download>.
1. Install needed packages (see below).
@ -110,7 +104,7 @@ You run R code in the __console__ pane. Textual output appears inline, and graph
There are three keyboard shortcuts for the RStudio IDE that we strongly encourage that you learn because they'll save you so much time:
* Cmd + Enter: sends current line (or current selection) from the editor to
* Cmd + Enter: sends the current line (or current selection) from the editor to
the console and runs it. (Ctrl + Enter on a PC)
* Tab: suggest possible completions for the text you've typed.
@ -126,7 +120,7 @@ We strongly recommend making two changes to the default RStudio options:
knitr::include_graphics("screenshots/rstudio-workspace.png")
```
This ensures that every time you restart RStudio you get a completely clean slate. This is good pratice because it encourages you to capture all important interactions in your code. There's nothing worse than discovering three months after the fact that you've only stored the results of important calculation in your workspace, not the calculation itself in your code. During a project, it's good practice to regularly restart R either using the menu Session | Restart R or the keyboard shortcut Cmd + Shift + F10.
This ensures that every time you restart RStudio you get a completely clean slate. This is good pratice because it encourages you to capture all important interactions in your code. There's nothing worse than discovering three months after the fact that you've only stored the results of an important calculation in your workspace, not the calculation itself in your code. During a project, it's good practice to regularly restart R either using the menu Session | Restart R or the keyboard shortcut Cmd + Shift + F10.
### R packages
@ -134,7 +128,7 @@ You'll also need to install some R packages. An R _package_ is a collection of f
```{r}
pkgs <- c(
"bookdown", "broom", "dplyr", "ggplot2", "jpeg", "jsonlite",
"broom", "dplyr", "ggplot2", "jpeg", "jsonlite",
"knitr", "microbenchmark", "png", "pryr", "purrr", "readr", "stringr",
"tidyr"
)
@ -155,15 +149,15 @@ You will need to reload the package every time you start a new R session.
* Google. Always a great place to start! Adding "R" to a query is usually
enough to filter it down. If you ever hit an error message that you
don't know how to handle, it is a great idea to google it.
don't know how to handle, it is a great idea to Google it.
If your operating system defaults to another language, you can use
`Sys.setenv(LANGUAGE = "en")` to tell R to use english. That's likely to
`Sys.setenv(LANGUAGE = "en")` to tell R to use English. That's likely to
get you to common solutions more quickly.
* StackOverflow. Be sure to read and use [How to make a reproducible example](http://adv-r.had.co.nz/Reproducibility.html)([reprex](https://github.com/jennybc/reprex)) before posting. Unfortunately the R stackoverflow community is not always the friendliest.
* Stack Overflow. Be sure to read and use [How to make a reproducible example](http://adv-r.had.co.nz/Reproducibility.html)([reprex](https://github.com/jennybc/reprex)) before posting. Unfortunately the R Stack Overflow community is not always the friendliest.
* Twitter. #rstats hashtag is very welcoming. Great way to keep up with
* Twitter. The #rstats hashtag is very welcoming and is a great way to keep up with
what's happening in the community.
## Acknowledgements

947
iteration.Rmd Normal file
View File

@ -0,0 +1,947 @@
# Iteration
```{r setup, include=FALSE}
library(purrr)
```
In [functions], we talked about how important it is to reduce duplication in your code. Reducing code duplication has three main benefits:
1. It's easier to see the intent of your code, because your eyes are
drawn to what is different, not what is the same.
1. It's easier to respond to changes in requirements. As your needs
change, you only need to make changes in one place, rather than
remembering to change every place that you copied-and-pasted the
code.
1. You're likely to have fewer bugs because each line of code is
used in more places.
One part of reducing duplication is writing functions. Functions allow you to identify repeated patterns of code and extract them out into indepdent pieces that you can reuse and easily update as code changes. Iteration helps you when you need to do the same thing to multiple inputs: repeating the same operation on different columns, or on different datasets. (Generally, you won't need to use explicit iteration to deal with different subsets of your data: in most cases the implicit iteration in dplyr will take care of that problem for you.)
In this chapter you'll learn about two important iteration paradigms: imperative programming and functional programming, and the machinary each provides. On the imperative side you have things like for loops and while loops, which are a great place to start because they make iteration very explicit, so it's obvious what's happening. However, for loops are quite verbose, and include quite a bit of book-keeping code, that is duplicated for every for loop. Functional programming (FP) offers tools to extract out this duplicated code, so each common for loop pattern gets its own function. Once you master the vocabulary of FP, you can solve many common iteration problems with less code, more ease, and fewer errors.
Some people will tell you to avoid for loops because they are slow. They're wrong! (Well at least they're rather out of date, for loops haven't been slow for many years). The chief benefits of using FP functions like `lapply()` or `purrr::map()` is that they are more expressive and make code both easier to write and easier to read.
In later chapters you'll learn how to apply these iterating ideas when modelling. You can often use multiple simple models to help understand a complex dataset, or you might have multiple models because you're bootstrapping or cross-validating. The techniques you'll learn in this chapter will be invaluable.
The goal of using purrr functions instead of for loops is to allow you break common list manipulation challenges into independent pieces:
1. How can you solve the problem for a single element of the list? Once
you've solved that problem, purrr takes care of generalising your
solution to every element in the list.
1. If you're solving a complex problem, how can you break it down into
bite sized pieces that allow you to advance one small step towards a
solution? With purrr, you get lots of small pieces that you can
compose together with the pipe.
This structure makes it easier to solve new problems. It also makes it easier to understand your solutions to old problems when you re-read your old code.
## For loops
Imagine we have this simple data frame:
```{r}
df <- data.frame(
a = rnorm(10),
b = rnorm(10),
c = rnorm(10),
d = rnorm(10)
)
```
We want to compute the median of each column. You _could_ do with copy-and-paste:
```{r}
median(df$a)
median(df$b)
median(df$c)
median(df$d)
```
But that breaks our rule of thumb: never copy and paste more than twice. Instead, we could use a for loop:
```{r}
output <- vector("double", ncol(df)) # 1. output
for (i in seq_along(df)) { # 2. sequence
output[[i]] <- median(df[[i]]) # 3. body
}
output
```
Every for loop has three components:
1. The __output__: `output <- vector("integer", length(x))`.
Before you start the loop, you must always allocate sufficient space
for the output. This is very important for efficiency: if you grow
the for loop at each iteration using `c()` (for example), your for loop
will be very slow.
A general way of creating an empty vector of given length is the `vector()`
function. It has two arguments: the type of the vector ("logical",
"integer", "double", "character", etc) and the length of the vector.
1. The __sequence__: `i in seq_along(df)`. This determines what to loop over:
each run of the for loop will assign `i` to a different value from
`seq_along(df)`. It's useful to think of `i` as a pronoun, like "it".
You might not have seen `seq_along()` before. It's a safe version of the
familiar `1:length(l)`, with an important difference: if you have a
zero-length vector, `seq_along()` does the right thing:
```{r}
y <- vector("double", 0)
seq_along(y)
1:length(y)
```
You probably won't create a zero-length vector deliberately, but
it's easy to create them accidentally. If you use `1:length(x)` instead
of `seq_along(x)`, you're likely to get a confusing error message.
1. The __body__: `output[[i]] <- median(df[[i]])`. This is the code that does
the work. It's run repeatedly, each time with a different value for `i`.
The first iteration will run `output[[1]] <- median(df[[1]])`,
the second will run `output[[2]] <- median(df[[2]])`, and so on.
That's all there is to the for loop! Now is a good time to practice creating some basic (and not so basic) for loops using the exercises below. Then we'll move on some variations of the for loop that help you solve other problems that will crop up in practice.
### Exercises
1. Write for loops to:
1. Compute the mean of every column in the `mtcars`.
1. Determine the type of each column in `nycflights13::flights`.
1. Compute the number of unique values in each column of `iris`.
1. Generate 10 random normals for each of $mu = -10$, $0$, $10$, and $100$.
Think about output, sequence, and body, __before__ you start writing
the loop.
1. Eliminate the for loop in each of the following examples by taking
advantage of a built-in function that works with vectors:
```{r}
out <- ""
for (x in letters) {
out <- paste0(out, x)
}
x <- sample(100)
sd <- 0
for (i in seq_along(x)) {
sd <- sd + (x[i] - mean(x)) ^ 2
}
sd <- sqrt(sd) / (length(x) - 1)
x <- runif(100)
out <- vector("numeric", length(x))
out[1] <- x[1]
for (i in 2:length(x)) {
out[i] <- out[i - 1] + x[i]
}
```
1. Combine your function writing and for loop skills.
1. Convert the song "99 bottles of beer on the wall" to a function.
Generalise to any number of any vessel containing any liquid on
any surface.
1. Convert the nursery rhyme "ten in the bed" to a function. Generalise
it to any number of people in any sleeping structure.
1. It's common to see for loops that don't preallocate the output and instead
increase the length of a vector at each step:
```{r, eval = FALSE}
output <- vector("integer", 0)
for (i in seq_along(x)) {
output <- c(output, lengths(x[[i]]))
}
output
```
How does this affect performance?
## For loop variations
Once you have the basic for loop under your belt, there are some variations on a theme that you should be aware of. These variations are important regardless of how you do iteration, so don't forget about them once you've master the FP techniques you'll learn about in the next section.
There are four variations on the basic theme of the for loop:
1. Modifying an existing object, instead of creating a new object.
1. Looping over names or values, instead of indices.
1. Handling outputs of unknown length.
1. Handling sequences of unknown length.
### Modifying an existing object
Sometimes you want to use a for loop to modify an existing object. For example, remember our challenge from [functions]. We wanted to rescale every column in a data frame:
```{r}
df <- data.frame(
a = rnorm(10),
b = rnorm(10),
c = rnorm(10),
d = rnorm(10)
)
rescale01 <- function(x) {
rng <- range(x, na.rm = TRUE)
(x - rng[1]) / (rng[2] - rng[1])
}
df$a <- rescale01(df$a)
df$b <- rescale01(df$b)
df$c <- rescale01(df$c)
df$d <- rescale01(df$d)
```
To solve this with a for loop we use the same three components:
1. Output: we already have the output - it's the same as the input!
1. Sequence: we can think about a data frame as a list of columns, so
we can iterate over each column with `seq_along(df)`.
1. Body: apply `rescale01()`.
This gives us:
```{r}
for (i in seq_along(df)) {
df[[i]] <- rescale01(df[[i]])
}
```
Typically you'll be modifying a list or data frame with this sort of loop, so remember to use `[[`, not `[`. You might have spotted that I used `[[` in all my for loops: I think it's safer to use the subsetting operator that will work in all circumstances (and it makes it clear than I'm working with a single value each time).
### Looping patterns
There are three basic ways to loop over a vector. So far I've shown you the most general: looping over the numeric indices with `for (i in seq_along(xs))`, and extracting the value with `x[[i]]`. There are two other forms:
1. Loop over the elements: `for (x in xs)`. This is most useful if you only
care about side-effects, liking plotting or saving a file, because it's
difficult to save the output efficiently.
1. Loop over the names: `for (nm in names(xs))`. This gives you name, which
you can use to access the value with `x[[nm]]`. This is useful if you want
to use the name in a plot title or a file name. If you're creating
named output, make sure to name the results vector like so:
```{r, eval = FALSE}
results <- vector("list", length(x))
names(results) <- names(x)
```
Iteration over the numeric indices is the most general form, because given the position you can extract both the name and the value:
```{r, eval = FALSE}
for (i in seq_along(x)) {
name <- names(x)[[i]]
value <- x[[i]]
}
```
### Unknown output length
Sometimes you might know now how long the output will be. For example, imagine you want to simulate some random vectors of random lengths. You might be tempted to solve this problem by progressively growing the vector:
```{r}
means <- c(0, 1, 2)
output <- double()
for (i in seq_along(means)) {
n <- sample(100, 1)
output <- c(output, rnorm(n, means[[i]]))
}
str(output)
```
But this type of is not very efficient because in each iteration, R has to copy all the data from the previous iterations. In technical terms you get "quadratic" ($O(n^2)$) behaviour which means that a loop with three times as many elements would take nine times ($3^2$) as long to run.
A better solution to save the results in a list, and then combine into a single vector after the loop is done:
```{r}
out <- vector("list", length(means))
for (i in seq_along(means)) {
n <- sample(100, 1)
out[[i]] <- rnorm(n, means[[i]])
}
str(out)
str(unlist(out))
```
Here I've used `unlist()` to flatten a list of vectors into a single vector. You'll learn about other options in [Removing a level of hierarchy].
This pattern occurs in other places too:
1. You might be generating a long string. Instead of `paste()`ing together
each iteration with the previous, save the output in a character vector and
then combine that vector into a single string with
`paste(output, collapse = "")`.
1. You might be generating a big data frame. Instead of sequentially
`rbind()`ing in each iteration, save the output in a list, then use
`dplyr::bind_rows(output)` to combine the output into a single
data frame.
Watch out for this pattern. Whenever you see it, switch to a more complex results object, and then combine in one step at the end.
### Unknown sequence length
Sometimes you don't even know how long the input sequence should run for. This is common when doing simulations. For example, you might want to loop until you get three heads in a row. You can't do that sort of iteration with the for loop. Instead, you can use a while loop.
A while loop is simpler than for loop because it only has two components, a condition and a body:
```{r, eval = FALSE}
while (condition) {
# body
}
```
A while loop is more general than a for loop, because you can rewrite any for loop as a while loop, but you can't rewrite every while loop as a for loop:
```{r, eval = FALSE}
for (i in seq_along(x)) {
# body
}
# Equivalent to
i <- 1
while (i < length(x)) {
# body
i <- i + 1
}
```
Here's how we could use a while loop to find how many tries it takes to get three heads in a row:
```{r}
flip <- function() sample(c("T", "H"), 1)
flips <- 1
nheads <- 0
while (nheads < 3) {
if (flip() == "H") {
nheads <- nheads + 1
} else {
nheads <- 0
}
flips <- flips + 1
}
flips
```
I mention while loops briefly, because I hardly ever use them. They're most often used for simulation, which is outside the scope of this book. However, it is good to know they exist, if you encounter a problem where the number of iterations is not known in advance.
### Exercises
1. Imagine you have a directory full of csv files that you want to read in.
You have their paths in a vector,
`files <- dir("data/", pattern = "\\.csv$", full.paths = TRUE)`, and now
want to read each one with `read_csv()`. Write the for loop that will
load them into a single data frame.
1. Write a function that prints the mean of each numeric column in a data
frame, along with its name. For example, `show_mean(iris)` would print:
```{r, eval = FALSE}
show_mean(iris)
#> Sepal.Length: 5.84
#> Sepal.Width: 3.06
#> Petal.Length: 3.76
#> Petal.Width: 1.20
```
(Extra challenge: what function did I use to make sure that the numbers
lined up nicely, even though the variables had different names?)
1. What does this code do? How does it work?
```{r, eval = FALSE}
trans <- list(
disp = function(x) x * 0.0163871,
am = function(x) {
factor(x, levels = c("auto", "manual"))
}
)
for (var in names(trans)) {
mtcars[[var]] <- trans[[var]](mtcars[[var]])
}
```
## For loops vs functionals
For loops are not as important in R as they are in other languages because R is a functional programming language. This means that it's possible to wrap up for loops in a function, and call that function instead of using the for loop directly.
To see why this is important, consider (again) this simple data frame:
```{r}
df <- data.frame(
a = rnorm(10),
b = rnorm(10),
c = rnorm(10),
d = rnorm(10)
)
```
Imagine you want to compute the mean of every column. You could do that with a for loop:
```{r}
output <- numeric(length(df))
for (i in seq_along(df)) {
output[[i]] <- mean(df[[i]])
}
output
```
You realise that you're going to want to compute the means of every column pretty frequently, so you extract it out into a function:
```{r}
col_mean <- function(df) {
output <- numeric(length(df))
for (i in seq_along(df)) {
output[i] <- mean(df[[i]])
}
output
}
```
But then you think it'd also be helpful to be able to compute the median, and the standard deviation, so you copy and paste your `col_mean()` function and replace the `mean()` with `median()` and `sd()`:
```{r}
col_median <- function(df) {
output <- numeric(length(df))
for (i in seq_along(df)) {
output[i] <- median(df[[i]])
}
output
}
col_sd <- function(df) {
output <- numeric(length(df))
for (i in seq_along(df)) {
output[i] <- sd(df[[i]])
}
output
}
```
Uh oh! You've copied-and-pasted this code twice, so it's time to think about how to generalise it. Notice that most of code is for-loop boilerplate and it's hard to see the one thing (`mean()`, `median()`, `sd()`) that is different between the functions.
What would you do if you saw a set of functions like this:
```{r}
f1 <- function(x) abs(x - mean(x)) ^ 1
f2 <- function(x) abs(x - mean(x)) ^ 2
f3 <- function(x) abs(x - mean(x)) ^ 3
```
Hopefully, you'd notice that there's a lot of duplication, and extract it out into an additional argument:
```{r}
f <- function(x, i) abs(x - mean(x)) ^ i
```
You've reduced the chance of bugs (because you now have 1/3 less code), and made it easy to generalise to new situations.
We can do exactly the same thing with `col_mean()`, `col_median()` and `col_sd()`. We can add an argument that supplies the function to apply to each column:
```{r}
col_summary <- function(df, fun) {
out <- vector("numeric", length(df))
for (i in seq_along(df)) {
out[i] <- fun(df[[i]])
}
out
}
col_summary(df, median)
col_summary(df, mean)
```
The idea of passing a function to another function is extremely powerful idea, and it's one of the reasons that R is called a functional programming language. It might take you a while to wrap your head around the idea, but it's worth the investment. In the rest of the chapter, you'll learn about and use the __purrr__ package, which provides functions that eliminate the need for many common for loops. The apply family of functions in base R (`apply()`, `lapply()`, `tapply()`, etc) solve a similar problem, but purrr is more consistent and thus is easier to learn.
### Exercises
1. Read the documentation for `apply()`. In the 2d case, what two for loops
does it generalise?
1. Adapt `col_summary()` so that it only applies to numeric columns
You might want to start with an `is_numeric()` function that returns
a logical vector that has a TRUE corresponding to each numeric column.
## The map functions
The pattern of looping over a vector, doing something to each element and saving the results is so common that the purrr package provides a family of functions to do it for you. There is one function for each type of vector:
* `map()` returns a list.
* `map_lgl()` returns a logical vector.
* `map_int()` returns a integer vector.
* `map_dbl()` returns a double vector.
* `map_chr()` returns a character vector.
Each function takes a vector as input, applies a function to each piece, and then returns a new vector that's the same length (and has the same names) as the input. The type of the vector is determined by the suffix to the map function.
Once you master these functions, you'll find it takes much less time to solve iteration problems. But you should never feel bad about using a for loop instead of a map function. The map functions are a step up a tower of abstraction, and it can take a long time to get your head around how they work. The important thing is that you solve the problem that you're working on, not write the most concise and elegant code (although that's definitely something you want to strive towards!).
We can use these functions to perform the same computations as the last for loop. Those summary functions returned doubles, so we need to use `map_dbl()`:
```{r}
map_dbl(df, mean)
map_dbl(df, median)
map_dbl(df, sd)
```
Compared to using a for loop, focus is on the operation being performed (i.e. `mean()`, `median()`, `sd()`), not the book-keeping required to loop over every element and store the output. This is even more apparent if we use the pipe:
```{r}
df %>% map_dbl(mean)
df %>% map_dbl(median)
df %>% map_dbl(sd)
```
There are a few differences between `map_*()` and `col_summary()`:
* All purrr functions are implemented in C. This makes them a little faster
at the expense of readability.
* The second argument, `.f`, the function to apply, can be a formula, a
character vector, or an integer vector. You'll learn about those handy
shortcuts in the next section.
* `map_*()` uses ... ([dot dot dot]) to pass along additional arguments
to `.f` each time it's called:
```{r}
map_dbl(df, mean, trim = 0.5)
```
* The map functions also preserve names:
```{r}
z <- list(x = 1:3, y = 4:5)
map_int(z, length)
```
### Shortcuts
There are a few shortcuts that you can use with `.f` in order to save a little typing. Imagine you want to fit a linear model to each group in a dataset. The following toy example splits the up the `mtcars` dataset in to three pieces (one for each value of cylinder) and fits the same linear model to each piece:
```{r}
models <- mtcars %>%
split(.$cyl) %>%
map(function(df) lm(mpg ~ wt, data = df))
```
The syntax for creating an anonymous function in R is quite verbose so purrr provides a convenient shortcut: a one-sided formula.
```{r}
models <- mtcars %>%
split(.$cyl) %>%
map(~lm(mpg ~ wt, data = .))
```
Here I've used `.` as a pronoun: it refers to the current list element (in the same way that `i` referred to the current index in the for loop).
When you're looking at many models, you might want to extract a summary statistic like the $R^2$. To do that we need to first run `summary()` and then extract the component called `r.squared`. We could do that using the shorthand for anonymous functions:
```{r}
models %>%
map(summary) %>%
map_dbl(~.$r.squared)
```
But extracting named components is a common operation, so purrr provides an even shorter shortcut: you can use a string.
```{r}
models %>%
map(summary) %>%
map_dbl("r.squared")
```
You can also use a numeric vector to select elements by position:
```{r}
x <- list(list(1, 2, 3), list(4, 5, 6), list(7, 8, 9))
x %>% map_dbl(2)
```
### Base R
If you're familiar with the apply family of functions in base R, you might have noticed some similarities with the purrr functions:
* `lapply()` is basically identical to `map()`. There's no advantage to using
`map()` over `lapply()` except that it's consistent with all the other
functions in purrr, and you can use the shortcuts for `.f`.
* Base `sapply()` is a wrapper around `lapply()` that automatically
simplifies the output. This is useful for interactive work but is
problematic in a function because you never know what sort of output
you'll get:
```{r}
x1 <- list(
c(0.27, 0.37, 0.57, 0.91, 0.20),
c(0.90, 0.94, 0.66, 0.63, 0.06),
c(0.21, 0.18, 0.69, 0.38, 0.77)
)
x2 <- list(
c(0.50, 0.72, 0.99, 0.38, 0.78),
c(0.93, 0.21, 0.65, 0.13, 0.27),
c(0.39, 0.01, 0.38, 0.87, 0.34)
)
threshold <- function(x, cutoff = 0.8) x[x > cutoff]
x1 %>% sapply(threshold) %>% str()
x2 %>% sapply(threshold) %>% str()
```
* `vapply()` is a safe alternative to `sapply()` because you supply an
additional argument that defines the type. The only problem with
`vapply()` is that it's a lot of typing:
`vapply(df, is.numeric, logical(1))` is equivalent to
`map_lgl(df, is.numeric)`. One of advantage of `vapply()` over purrr's map
functions is that it can also produce matrices - the map functions only
ever produce vectors.
I focus on purrr functions here because they have more consistent names and arguments, helpful shortcuts, and in a future release will provide easy parallelism and progress bars.
### Exercises
1. How can you create a single vector that for each column in a data frame
indicates whether or not it's a factor?
1. What happens when you use the map functions on vectors that aren't lists?
What does `map(1:5, runif)` do? Why?
1. What does `map(-2:2, rnorm, n = 5)` do? Why?
What does `map_dbl(-2:2, rnorm, n = 5)` do? Why?
1. Rewrite `map(x, function(df) lm(mpg ~ wt, data = df))` to eliminate the
anonymous function.
## Dealing with failure
When you use the map functions to repeat many operations, the chances are much higher that one of those operations will fail. When this happens, you'll get an error message, and no output. This is annoying: why does one failure prevent you from accessing all the other successes? How do you ensure that one bad apple doesn't ruin the whole barrel?
In this section you'll learn how to deal this situation with a new function: `safely()`. `safely()` is an adverb: it takes a function (a verb) and returns a modified version. In this case, the modified function will never throw an error. Instead, it always returns a list with two elements:
1. `result` is the original result. If there was an error, this will be `NULL`.
1. `error` is an error object. If the operation was successful this will be
`NULL`.
(You might be familiar with the `try()` function in base R. It's similar, but because it sometimes returns the original result and it sometimes returns an error object it's more difficult to work with.)
Let's illustrate this with a simple example: `log()`:
```{r}
safe_log <- safely(log)
str(safe_log(10))
str(safe_log("a"))
```
When the function succeeds the `result` element contains the result and the `error` element is `NULL`. When the function fails, the `result` element is `NULL` and the `error` element contains an error object.
`safely()` is designed to work with map:
```{r}
x <- list(1, 10, "a")
y <- x %>% map(safely(log))
str(y)
```
This would be easier to work with if we had two lists: one of all the errors and one of all the output. That's easy to get with `purrr::transpose()` (you'll learn more about `transpose()` in [transpose])
```{r}
y <- y %>% transpose()
str(y)
```
It's up to you how to deal with the errors, but typically you'll either look at the values of `x` where `y` is an error, or work with the values of `y` that are ok:
```{r}
is_ok <- y$error %>% map_lgl(is_null)
x[!is_ok]
y$result[is_ok] %>% flatten_dbl()
```
Purrr provides two other useful adverbs:
* Like `safely()`, `possibly()` always succeeds. It's simpler than `safely()`,
because you give it a default value to return when there is an error.
```{r}
x <- list(1, 10, "a")
x %>% map_dbl(possibly(log, NA_real_))
```
* `quietly()` performs a similar role to `safely()`, but instead of capturing
errors, it captures printed output, messages, and warnings:
```{r}
x <- list(1, -1)
x %>% map(quietly(log)) %>% str()
```
### Exercises
1. Given the following list, extract all the error messages with the smallest
amount of code possible:
```{r}
x <- list(1, 10, "a")
y <- x %>% map(safely(log))
```
1.
## Mapping over multiple arguments
So far we've mapped along a single list. But often you have multiple related lists that you need iterate along in parallel. That's the job of the `map2()` and `pmap()` functions.
For example, imagine you want to simulate some random normals with different means. You know how to do that with `map()`:
```{r}
mu <- list(5, 10, -3)
mu %>% map(rnorm, n = 5) %>% str()
```
What if you also want to vary the standard deviation? One way to do that would be to iterate over the indices and index into vectors of means and sds:
```{r}
sigma <- list(1, 5, 10)
seq_along(mu) %>% map(~ rnorm(5, mu[[.]], sigma[[.]])) %>% str()
```
However, that someone obfuscates the intent of the code. Instead we could use `map2()` which works with iterates over two vectors in parallel:
```{r}
map2(mu, sigma, rnorm, n = 5) %>% str()
```
`map2()` generates this series of function calls:
```{r, echo = FALSE, out.width = "75%"}
knitr::include_graphics("diagrams/lists-map2.png")
```
Note that the arguments that vary for each call come before the function name, and arguments that are the same for every function call come afterwards.
Like `map()`, `map2()` is just a wrapper around a for loop:
```{r}
map2 <- function(x, y, f, ...) {
out <- vector("list", length(x))
for (i in seq_along(x)) {
out[[i]] <- f(x[[i]], y[[i]], ...)
}
out
}
```
You could also imagine `map3()`, `map4()`, `map5()`, `map6()` etc, but that would get tedious quickly. Instead, purrr provides `pmap()` which takes a list of arguments. You might use that if you wanted to vary the mean, standard deviation, and number of samples:
```{r}
n <- list(1, 3, 5)
args1 <- list(n, mu, sigma)
args1 %>% pmap(rnorm) %>% str()
```
That looks like:
```{r, echo = FALSE, out.width = "75%"}
knitr::include_graphics("diagrams/lists-pmap-unnamed.png")
```
If you don't name the elements of list, `pmap()` will use positional matching when calling the function. That's a little fragile, and makes the code harder to read, so it's better to name the arguments:
```{r, eval = FALSE}
args2 <- list(mean = mu, sd = sigma, n = n)
args2 %>% pmap(rnorm) %>% str()
```
That generates longer, but safer, calls:
```{r, echo = FALSE, out.width = "75%"}
knitr::include_graphics("diagrams/lists-pmap-named.png")
```
Since the arguments are all the same length, it makes sense to store them in a data frame:
```{r}
params <- dplyr::data_frame(mean = mu, sd = sigma, n = n)
params$result <- params %>% pmap(rnorm)
params
```
As soon as your code gets complicated, I think a data frame is a good approach because it ensures that each column has a name and is the same length as all the other columns.
We'll come back to this idea in [hierarchy], and again when we explore the intersection of dplyr, purrr, and model fitting.
### Invoking different functions
There's one more step up in complexity - as well as varying the arguments to the function you might also vary the function itself:
```{r}
f <- c("runif", "rnorm", "rpois")
param <- list(
list(min = -1, max = 1),
list(sd = 5),
list(lambda = 10)
)
```
To handle this case, you can use `invoke_map()`:
```{r}
invoke_map(f, param, n = 5) %>% str()
```
```{r, echo = FALSE}
knitr::include_graphics("diagrams/lists-invoke.png")
```
The first argument is a list of functions or character vector of function names. The second argument is a list of lists giving the arguments that vary for each function. The subsequent arguments are passed on to every function.
You can use `tibble::frame_data()` to make creating these matching pairs a little easier:
```{r, eval = FALSE}
# Needs dev version of dplyr
sim <- tibble::frame_data(
~f, ~params,
"runif", list(min = -1, max = -1),
"rnorm", list(sd = 5),
"rpois", list(lambda = 10)
)
sim$f %>% invoke_map(sim$params, n = 10) %>% str()
```
### Exercises
1.
## Walk {#walk}
Walk is an alternative to map that you use when you want to call a function for its side effects, rather than for its return value. You typically do this because you want to render output to the screen or save files to disk - the important thing is the action, not the return value. Here's a very simple example:
```{r}
x <- list(1, "a", 3)
x %>%
walk(print)
```
`walk()` is generally not that useful compared to `walk2()` or `pwalk()`. For example, if you had a list of plots and a vector of file names, you could use `pwalk()` to save each file to the corresponding location on disk:
```{r, eval = FALSE}
library(ggplot2)
plots <- mtcars %>%
split(.$cyl) %>%
map(~ggplot(., aes(mpg, wt)) + geom_point())
paths <- paste0(names(plots), ".pdf")
pwalk(list(paths, plots), ggsave, path = tempdir())
```
`walk()`, `walk2()` and `pwalk()` all invisibly return `.x`, the first argument. This makes them suitable for use in the middle of pipelines.
## Other patterns of for loops
Purrr provides a number of other functions that abstract over other types of for loops. You'll use them less frequently than the map functions, but they're useful to have in your back pocket. The goal here is to briefly illustrate each function so hopefully it will come to mind if you see a similar problem in the future. Then you can go look up the documentation for more details.
### Predicate functions
A number of functions work with __predicates__ functions that return either a single `TRUE` or `FALSE`.
`keep()` and `discard()` keep elements of the input where the predicate is `TRUE` or `FALSE` respectively:
```{r}
iris %>% keep(is.factor) %>% str()
iris %>% discard(is.factor) %>% str()
```
`some()` and `every()` determine if the predicate is true for any or for all of
the elements.
```{r}
x <- list(1:5, letters, list(10))
x %>% some(is_character)
x %>% every(is_vector)
```
`detect()` finds the first element where the predicate is true; `detect_index()` returns its position.
```{r}
x <- sample(10)
x
x %>% detect(~ . > 5)
x %>% detect_index(~ . > 5)
```
`head_while()` and `tail_while()` take elements from the start or end of a vector while a predicate is true:
```{r}
head_while(x, ~ . > 5)
tail_while(x, ~ . > 5)
```
### Reduce and accumulate
Sometimes you have a complex list that you want to reduce to a simple list by repeatedly applying a function that reduces two inputs to a single input. This useful if you want to apply a two-table dplyr verb to multiple tables. For example, you might have a list of data frames, and you want to reduce to a single data frame by joining the elements together
```{r}
dfs <- list(
age = tibble::data_frame(name = "John", age = 30),
sex = tibble::data_frame(name = c("John", "Mary"), sex = c("M", "F")),
trt = tibble::data_frame(name = "Mary", treatment = "A")
)
dfs %>% reduce(dplyr::full_join)
```
The reduce function takes a "binary" function (i.e. a function with two primary inputs), and applies it repeatedly to a list until there is only a single element left.
Accumulate is similar but it keeps all the interim results. You could use it to implement a cumulative sum:
```{r}
x <- sample(10)
x
x %>% accumulate(`+`)
```
### Exercises
1. Implement your own version of `every()` using a for loop. Compare it with
`purrr::every()`. What does purrr's version do that your version doesn't?
1. Create an enhanced `col_sum()` that applies a summary function to every
numeric column in a data frame.
1. A possible base R equivalent of `col_sum()` is:
```{r}
col_sum3 <- function(df, f) {
is_num <- sapply(df, is.numeric)
df_num <- df[, is_num]
sapply(df_num, f)
}
```
But it has a number of bugs as illustrated with the following inputs:
```{r, eval = FALSE}
df <- data.frame(z = c("a", "b", "c"), x = 1:3, y = 3:1)
# OK
col_sum3(df, mean)
# Has problems: don't always return numeric vector
col_sum3(df[1:2], mean)
col_sum3(df[1], mean)
col_sum3(df[0], mean)
```
What causes the bugs?

871
lists.Rmd
View File

@ -1,871 +0,0 @@
---
layout: default
title: Lists
---
# Lists
```{r setup-lists, include=FALSE}
library(purrr)
source("common.R")
```
In this chapter, you'll learn how to handle lists, the data structure R uses for complex, hierarchical objects. You've already familiar with vectors, R's data structure for 1d objects. Lists extend these ideas to model objects that are like trees. You can create a hierarchical structure with a list because unlike vectors, a list can contain other lists.
If you've worked with list-like objects before, you're probably familiar with the for loop. I'll talk a little bit about for loops here, but the focus will be functions from the __purrr__ package. purrr makes it easier to work with lists by eliminating common for loop boilerplate so you can focus on the specifics. The apply family of functions in base R (`apply()`, `lapply()`, `tapply()`, etc) solve a similar problem, but purrr is more consistent and easier to learn.
The goal of using purrr functions instead of for loops is to allow you break common list manipulation challenges into independent pieces:
1. How can you solve the problem for a single element of the list? Once
you've solved that problem, purrr takes care of generalising your
solution to every element in the list.
1. If you're solving a complex problem, how can you break it down into
bite sized pieces that allow you to advance one small step towards a
solution? With purrr, you get lots of small pieces that you can
combose together with the pipe.
This structure makes it easier to solve new problems. It also makes it easier to understand your solutions to old problems when you re-read your old code.
In later chapters you'll learn how to apply these ideas when modelling. You can often use multiple simple models to help understand a complex dataset, or you might have multiple models because you're bootstrapping or cross-validating. The techniques you learn in this chapter will be invaluable.
<!--
## Warm ups
* What does this for loop do?
* How is a data frame like a list?
* What does `mean()` mean? What does `mean` mean?
* How do you get help about the $ function? How do you normally write
`[[`(mtcars, 1) ?
* Argument order
-->
## List basics
You create a list with `list()`:
```{r}
x <- list(1, 2, 3)
str(x)
x_named <- list(a = 1, b = 2, c = 3)
str(x_named)
```
Unlike atomic vectors, `lists()` can contain a mix of objects:
```{r}
y <- list("a", 1L, 1.5, TRUE)
str(y)
```
Lists can even contain other lists!
```{r}
z <- list(list(1, 2), list(3, 4))
str(z)
```
`str()` is very helpful when looking at lists because it focusses on the structure, not the contents.
### Visualising lists
To explain more complicated list manipulation functions, it's helpful to have a visual representation of lists. For example, take these three lists:
```{r}
x1 <- list(c(1, 2), c(3, 4))
x2 <- list(list(1, 2), list(3, 4))
x3 <- list(1, list(2, list(3)))
```
I draw them as follows:
```{r, echo = FALSE, out.width = "75%"}
knitr::include_graphics("diagrams/lists-structure.png")
```
* Lists are rounded rectangles that contain their children.
* I draw each child a little darker than its parent to make it easier to see
the hierarchy.
* The orientation of the children (i.e. rows or columns) isn't important,
so I pick a row or column orientation to either save space or illustrate
an important property in the example.
### Subsetting
There are three ways to subset a list, which I'll illustrate with `a`:
```{r}
a <- list(a = 1:3, b = "a string", c = pi, d = list(-1, -5))
```
* `[` extracts a sub-list. The result will always be a list.
```{r}
str(a[1:2])
str(a[4])
```
Like subsetting vectors, you can use an integer vector to select by
position, or a character vector to select by name.
* `[[` extracts a single component from a list. It removes a level of
hierarchy from the list.
```{r}
str(y[[1]])
str(y[[4]])
```
* `$` is a shorthand for extracting named elements of a list. It works
similarly to `[[` except that you don't need to use quotes.
```{r}
a$a
a[["b"]]
```
Or visually:
```{r, echo = FALSE, out.width = "75%"}
knitr::include_graphics("diagrams/lists-subsetting.png")
```
### Lists of condiments
It's easy to get confused between `[` and `[[`, but it's important to understand the difference. A few months ago I stayed at a hotel with a pretty interesting pepper shaker that I hope will help remember these differences:
```{r, echo = FALSE, out.width = "25%"}
knitr::include_graphics("images/pepper.jpg")
```
If this pepper shaker is your list `x`, then, `x[1]` is a pepper shaker containing a single pepper packet:
```{r, echo = FALSE, out.width = "25%"}
knitr::include_graphics("images/pepper-1.jpg")
```
`x[2]` would look the same, but would contain the second packet. `x[1:2]` would be a pepper shaker containing two pepper packets.
`x[[1]]` is:
```{r, echo = FALSE, out.width = "25%"}
knitr::include_graphics("images/pepper-2.jpg")
```
If you wanted to get the content of the pepper package, you'd need `x[[1]][[1]]`:
```{r, echo = FALSE, out.width = "25%"}
knitr::include_graphics("images/pepper-3.jpg")
```
### Exercises
1. Draw the following lists as nested sets.
1. Generate the lists corresponding to these nested set diagrams.
1. What happens if you subset a data frame as if you're subsetting a list?
What are the key differences between a list and a data frame?
## For loops
To illustrate for loops, we'll start by creating a stereotypical list: an eight element list where each element contains a random vector of random length. (You'll learn about `rerun()` later.)
```{r}
x <- rerun(8, runif(sample(5, 1)))
str(x)
```
Imagine we want to compute the length of each element in this list. One way to do that is with a for loop:
```{r}
results <- vector("integer", length(x))
for (i in seq_along(x)) {
results[i] <- length(x[[i]])
}
results
```
There are three parts to a for loop:
1. The __results__: `results <- vector("integer", length(x))`.
This creates an integer vector the same length as the input. It's important
to enough space for all the results up front, otherwise you have to grow the
results vector at each iteration, which is very slow for large loops.
1. The __sequence__: `i in seq_along(x)`. This determines what to loop over:
each run of the for loop will assign `i` to a different value from
`seq_along(x)`, shorthand for `1:length(x)`. It's useful to think of `i`
as a pronoun.
1. The __body__: `results[i] <- length(x[[i]])`. This code is run repeatedly,
each time with a different value in `i`. The first iteration will run
`results[1] <- length(x[[1]])`, the second `results[2] <- length(x[[2]])`,
and so on.
This loop used a function you might not be familiar with: `seq_along()`. This is a safe version of the more familiar `1:length(l)`. There's one important difference in behaviour. If you have a zero-length vector, `seq_along()` does the right thing:
```{r}
y <- numeric(0)
seq_along(y)
1:length(y)
```
Figuring out the length of the elements of a list is a common operation, so it makes sense to turn it into a function so we can reuse it again and again:
```{r}
compute_length <- function(x) {
results <- vector("numeric", length(x))
for (i in seq_along(x)) {
results[i] <- length(x[[i]])
}
results
}
compute_length(x)
```
(In fact base R has this function already: it's called `lengths()`.)
Now imagine we want to compute the `mean()` of each element. How would our function change? What if we wanted to compute the `median()`? You could create variations of `compute_lengths()` that looked like this:
```{r}
compute_mean <- function(x) {
results <- vector("numeric", length(x))
for (i in seq_along(x)) {
results[i] <- mean(x[[i]])
}
results
}
compute_mean(x)
compute_median <- function(x) {
results <- vector("numeric", length(x))
for (i in seq_along(x)) {
results[i] <- median(x[[i]])
}
results
}
compute_median(x)
```
But this is only two of the many functions we might want to apply to every element of a list, and there's already lot of duplication. Most of the code is for-loop boilerplate and it's hard to see the one function (`length()`, `mean()`, or `median()`) that's actually important.
What would you do if you saw a set of functions like this:
```{r}
f1 <- function(x) abs(x - mean(x)) ^ 1
f2 <- function(x) abs(x - mean(x)) ^ 2
f3 <- function(x) abs(x - mean(x)) ^ 3
```
Hopefully, you'd notice that there's a lot of duplication, and extract it out into an additional argument:
```{r}
f <- function(x, i) abs(x - mean(x)) ^ i
```
You've reduce the chance of bugs (because you now have 1/3 less code), and made it easy to generalise to new situations. We can do exactly the same thing with `compute_length()`, `compute_median()` and `compute_mean()`:
```{r}
compute_summary <- function(x, f) {
results <- vector("numeric", length(x))
for (i in seq_along(x)) {
results[i] <- f(x[[i]])
}
results
}
compute_summary(x, mean)
```
Instead of hardcoding the summary function, we allow it to vary, by adding an additional argument that is a function. It can take a while to wrap your head around this, but it's very powerful technique. This is one of the reasons that R is known as a "functional" programming language.
### Exercises
1. Read the documentation for `apply()`. In the 2d case, what two for loops
does it generalise?
1. It's common to see for loops that don't preallocate the output and instead
increase the length of a vector at each step:
```{r}
results <- vector("integer", 0)
for (i in seq_along(x)) {
results <- c(results, lengths(x[[i]]))
}
results
```
How does this affect performance?
## The map functions
This pattern of looping over a list and doing something to each element is so common that the purrr package provides a family of functions to do it for you. Each function always returns the same type of output so there are six variations based on what sort of result you want:
* `map()` returns a list.
* `map_lgl()` returns a logical vector.
* `map_int()` returns a integer vector.
* `map_dbl()` returns a double vector.
* `map_chr()` returns a character vector.
* `map_df()` returns a data frame.
* `walk()` returns nothing. Walk is a little different to the others because
it's called exclusively for its side effects, so it's described in more detail
later in [walk](#walk).
Each functions takes a list as input, applies a function to each piece, and then returns a new vector that's the same length as the input. The type of the vector is determine by the specific map function. Usually you want to use the most specific avaiable; using `map()` only as a fallback when there is no specialised equivalent available.
We can use these functions to perform the same computations as the previous for loops:
```{r}
map_int(x, length)
map_dbl(x, mean)
map_dbl(x, median)
```
Compared to using a for loop, focus is on the operation being performed (i.e. `length()`, `mean()`, or `median()`), not the book-keeping required to loop over every element and store the results.
There are a few differences between `map_*()` and `compute_summary()`:
* All purrr functions are implemented in C. This means you can't easily
understand their code, but it makes them a little faster.
* The second argument, `.f`, the function to apply, can be a formula, a
character vector, or an integer vector. You'll learn about those handy
shortcuts in the next section.
* Any arguments after `.f` will be passed on to it each time its called:
```{r}
map_dbl(x, mean, trim = 0.5)
```
* The map functions also preserve names:
```{r}
z <- list(x = 1:3, y = 4:5)
map_int(z, length)
```
### Shortcuts
There are a few shortcuts that you can use with `.f` in order to save a little typing. Imagine you want to fit a linear model to each individual in a dataset. The following toy example splits the up the `mtcars` dataset in to three pieces (only for each value of cylinder) and fits the same linear model to each piece:
```{r}
models <- mtcars %>%
split(.$cyl) %>%
map(function(df) lm(mpg ~ wt, data = df))
```
The syntax for creating an anonymous function in R is quite verbose so purrr provides a convenient shortcut: a one-sided formula.
```{r}
models <- mtcars %>%
split(.$cyl) %>%
map(~lm(mpg ~ wt, data = .))
```
Here I've used `.` as a pronoun: it refers to the "current" list element (in the same way that `i` referred to the number in the for loop). You can also use `.x` and `.y` to refer to up to two arguments. If you want to create an function with more than two arguments, do it the regular way!
When you're looking at many models, you might want to extract a summary statistic like the $R^2$. To do that we need to first run `summary()` and then extract the component called `r.squared`. We could do that using the shorthand for anonymous funtions:
```{r}
models %>%
map(summary) %>%
map_dbl(~.$r.squared)
```
But extracting named components is a common operation, so purrr provides an even shorter shortcut: you can use a string.
```{r}
models %>%
map(summary) %>%
map_dbl("r.squared")
```
You can also use a numeric vector to select elements by position:
```{r}
x <- list(list(1, 2, 3), list(4, 5, 6), list(7, 8, 9))
x %>% map_dbl(2)
```
### Base R
If you're familiar with the apply family of functions in base R, you might have noticed some similarities with the purrr functions:
* `lapply()` is basically identical to `map()`. There's no advantage to using
`map()` over `lapply()` except that it's consistent with all the other
functions in purrr.
* The base `sapply()` is a wrapper around `lapply()` that automatically tries
to simplify the results. This is useful for interactive work but is
problematic in a function because you never know what sort of output
you'll get:
```{r}
x1 <- list(
c(0.27, 0.37, 0.57, 0.91, 0.20),
c(0.90, 0.94, 0.66, 0.63, 0.06),
c(0.21, 0.18, 0.69, 0.38, 0.77)
)
x2 <- list(
c(0.50, 0.72, 0.99, 0.38, 0.78),
c(0.93, 0.21, 0.65, 0.13, 0.27),
c(0.39, 0.01, 0.38, 0.87, 0.34)
)
threshhold <- function(x, cutoff = 0.8) x[x > cutoff]
str(sapply(x1, threshhold))
str(sapply(x2, threshhold))
```
* `vapply()` is a safe alternative to `sapply()` because you supply an additional
argument that defines the type. The only problem with `vapply()` is that
it's a lot of typing: `vapply(df, is.numeric, logical(1))` is equivalent to
`map_lgl(df, is.numeric)`.
One of advantage of `vapply()` over the map functions is that it can also
produce matrices - the map functions only ever produce vectors.
* `map_df(x, f)` is effectively the same as `do.call("rbind", lapply(x, f))`
but under the hood is much more efficient.
### Exercises
1. How can you determine which columns in a data frame are factors?
(Hint: data frames are lists.)
1. What happens when you use the map functions on vectors that aren't lists?
What does `map(1:5, runif)` do? Why?
1. What does `map(-2:2, rnorm, n = 5)` do. Why?
1. Rewrite `map(x, function(df) lm(mpg ~ wt, data = df))` to eliminate the
anonymous function.
## Handling hierarchy {#hierarchy}
The map functions apply a function to every element in a list. They are the most commonly used part of purrr, but not the only part. Since lists are often used to represent complex hierarchies, purrr also provides tools to work with hierarchy:
* You can extract deeply nested elements in a single call by supplying
a character vector to the map functions.
* You can remove a level of the hierarchy with the flatten functions.
* You can flip levels of the hierarchy with the transpose function.
### Extracting deeply nested elements
Some times you get data structures that are very deeply nested. A common source of sych data is JSON from a web API. I've previously downloaded a list of GitHub issues related to this book and saved it as `issues.json`. Now I'm going to load it into a list with jsonlite. By default `fromJSON()` tries to be helpful and simplifies the structure a little for you. Here I'm going to show you how to do it with purrr, so I set `simplifyVector = FALSE`:
```{r}
# From https://api.github.com/repos/hadley/r4ds/issues
issues <- jsonlite::fromJSON("issues.json", simplifyVector = FALSE)
```
There are eight issues, and each issue is a nested list:
```{r}
length(issues)
str(issues[[1]])
```
To work with this sort of data, you typically want to turn it into a data frame by extracting the related vectors that you're most interested in:
```{r}
issues %>% map_int("id")
issues %>% map_lgl("locked")
issues %>% map_chr("state")
```
You can use the same technique to extract more deeply nested structure. For example, imagine you want to extract the name and id of the user. You could do that in two steps:
```{r}
users <- issues %>% map("user")
users %>% map_chr("login")
users %>% map_int("id")
```
But by supplying a character _vector_ to `map_*`, you can do it in one:
```{r}
issues %>% map_chr(c("user", "login"))
issues %>% map_int(c("user", "id"))
```
### Removing a level of hierarchy
As well as indexing deeply into hierarchy, it's sometimes useful to flatten it. That's the job of the flatten family of functions: `flatten()`, `flatten_lgl()`, `flatten_int()`, `flatten_dbl()`, and `flatten_chr()`. In the code below we take a list of lists of double vectors, then flatten it to a list of double vectors, then to a double vector.
```{r}
x <- list(list(a = 1, b = 2), list(c = 3, d = 4))
str(x)
y <- flatten(x)
str(y)
flatten_dbl(y)
```
Graphically, that sequence of operations looks like:
```{r, echo = FALSE}
knitr::include_graphics("diagrams/lists-flatten.png")
````
Whenever I get confused about a sequence of flattening operations, I'll often draw a diagram like this to help me understand what's going on.
Base R has `unlist()`, but I recommend avoiding it for the same reason I recommend avoiding `sapply()`: it always succeeds. Even if your data structure accidentally changes, `unlist()` will continue to work silently the wrong type of output. This tends to create problems that are frustrating to debug.
### Switching levels in the hierarchy
Other times the hierarchy feels "inside out". You can use `transpose()` to flip the first and second levels of a list:
```{r}
x <- list(
x = list(a = 1, b = 3, c = 5),
y = list(a = 2, b = 4, c = 6)
)
x %>% str()
x %>% transpose() %>% str()
```
Graphically, this looks like:
```{r, echo = FALSE, out.width = "75%"}
knitr::include_graphics("diagrams/lists-transpose.png")
```
You'll see an example of this in the next section, as `transpose()` is particularly useful in conjunction with adverbs like `safely()` and `quietly()`.
It's called transpose by analogy to matrices. When you subset a transposed matrix, you switch indices: `x[i, j]` is the same as `t(x)[j, i]`. It's the same idea when transposing a list, but the subsetting looks a little different: `x[[i]][[j]]` is equivalent to `transpose(x)[[j]][[i]]`. Similarly, a transpose is its own inverse so `transpose(transpose(x))` is equal to `x`.
Transpose is also useful when working with JSON apis. Many JSON APIs represent data frames in a row-based format, rather than R's column-based format. `transpose()` makes it easy to switch between the two:
```{r}
df <- dplyr::data_frame(x = 1:3, y = c("a", "b", "c"))
df %>% transpose() %>% str()
```
### Exercises
## Dealing with failure
When you do many operations on a list, sometimes one will fail. When this happens, you'll get an error message, and no output. This is annoying: why does one failure prevent you from accessing all the other successes? How do you ensure that one bad apple doesn't ruin the whole barrel?
In this section you'll learn how to deal this situation with a new function: `safely()`. `safely()` is an adverb: it takes a function (a verb) and returns a modified version. In this case, the modified function will never throw an error. Instead, it always returns a list with two elements:
1. `result` is original result. If there was an error, this will be `NULL`.
1. `error` is an error object. If the operation was successful this will be
`NULL`.
(You might be familiar with the `try()` function in base R. It's similar, but because it sometimes returns the original result and it sometimes returns an error object it's more difficult to work with.)
Let's illustrate this with a simple example: `log()`:
```{r}
safe_log <- safely(log)
str(safe_log(10))
str(safe_log("a"))
```
When the function succeeds the `result` element contains the result and the error element is `NULL`. When the function fails, the result element is `NULL` and the error element contains an error object.
`safely()` is designed to work with map:
```{r}
x <- list(1, 10, "a")
y <- x %>% map(safely(log))
str(y)
```
This would be easier to work with if we had two lists: one of all the errors and one of all the results. That's easy to get with `transpose()`.
```{r}
y <- y %>% transpose()
str(y)
```
It's up to you how to deal with the errors, but typically you'll either look at the values of `x` where `y` is an error or work with the values of y that are ok:
```{r}
is_ok <- y$error %>% map_lgl(is_null)
x[!is_ok]
y$result[is_ok] %>% flatten_dbl()
```
Purrr provides two other useful adverbs:
* Like `safely()`, `possibly()` always succeeds. It's simpler than `safely()`,
because you give it a default value to return when there is an error.
```{r}
x <- list(1, 10, "a")
x %>% map_dbl(possibly(log, NA_real_))
```
* `quietly()` performs a similar role to `safely()`, but instead of capturing
errors, it captures printed output, messages, and warnings:
```{r}
x <- list(1, -1)
x %>% map(quietly(log)) %>% str()
```
### Exercises
1. Challenge: read all the csv files in this directory. Which ones failed
and why?
```{r, eval = FALSE}
files <- dir("data", pattern = "\\.csv$")
files %>%
set_names(., basename(.)) %>%
map_df(safely(readr::read_csv), .id = "filename") %>%
```
## Parallel maps
So far we've mapped along a single list. But often you have mutliple related lists that you need iterate along in parallel. That's the job of the `map2()` and `pmap()` functions. For example, imagine you want to simulate some random normals with different means. You know how to do that with `map()`:
```{r}
mu <- list(5, 10, -3)
mu %>% map(rnorm, n = 10)
```
What if you also want to vary the standard deviation? You need to iterate along a vector of means and a vector of standard deviations in parallel. That's a job for `map2()` which works with two parallel sets of inputs:
```{r}
sigma <- list(1, 5, 10)
map2(mu, sigma, rnorm, n = 10)
```
`map2()` generates this series of function calls:
```{r, echo = FALSE, out.width = "75%"}
knitr::include_graphics("diagrams/lists-map2.png")
```
The arguments that vary for each call come before the function name, and arguments that are the same for every function call come afterwards.
Like `map()`, `map2()` is just a wrapper around a for loop:
```{r}
map2 <- function(x, y, f, ...) {
out <- vector("list", length(x))
for (i in seq_along(x)) {
out[[i]] <- f(x[[i]], y[[i]], ...)
}
out
}
```
You could also imagine `map3()`, `map4()`, `map5()`, `map6()` etc, but that would get tedious quickly. Instead, purrr provides `pmap()` which takes a list of arguments. You might use that if you wanted to vary the mean, standard deviation, and number of samples:
```{r}
n <- list(1, 3, 5)
args1 <- list(n, mu, sigma)
args1 %>% pmap(rnorm) %>% str()
```
That looks like:
```{r, echo = FALSE, out.width = "75%"}
knitr::include_graphics("diagrams/lists-pmap-unnamed.png")
```
However, instead of relying on position matching, it's better to name the arguments. This is more verbose, but it makes the code clearer.
```{r}
args2 <- list(mean = mu, sd = sigma, n = n)
args2 %>% pmap(rnorm) %>% str()
```
That generates longer, but safer, calls:
```{r, echo = FALSE, out.width = "75%"}
knitr::include_graphics("diagrams/lists-pmap-named.png")
```
Since the arguments are all the same length, it makes sense to store them in a data frame:
```{r}
params <- dplyr::data_frame(mean = mu, sd = sigma, n = n)
params$result <- params %>% pmap(rnorm)
params
```
As soon as your code gets complicated, I think a data frame is a good approach because it ensures that each column has a name and is the same length as all the other columns. We'll come back to this idea when we explore the intersection of dplyr, purr, and model fitting.
### Invoking different functions
There's one more step up in complexity - as well as varying the arguments to the function you might also vary the function itself:
```{r}
f <- c("runif", "rnorm", "rpois")
param <- list(
list(min = -1, max = 1),
list(sd = 5),
list(lambda = 10)
)
```
To handle this case, you can use `invoke_map()`:
```{r}
invoke_map(f, param, n = 5) %>% str()
```
```{r, echo = FALSE}
knitr::include_graphics("diagrams/lists-invoke.png")
```
The first argument is a list of functions or character vector of function names. The second argument is a list of lists giving the arguments that vary for each function. The subsequent arguments are passed on to every function.
You can use `dplyr::frame_data()` to make creating these matching pairs a little easier:
```{r, eval = FALSE}
# Needs dev version of dplyr
sim <- dplyr::frame_data(
~f, ~params,
"runif", list(min = -1, max = -1),
"rnorm", list(sd = 5),
"rpois", list(lambda = 10)
)
sim %>% dplyr::mutate(
samples = invoke_map(f, params, n = 10)
)
```
## Walk {#walk}
Walk is an alternative to map that you use when you want to call a function for its side effects, rather than for its return value. You typically do this because you want to render output to the screen or saving files to disk - the important thing is the action, not the return value. Here's a very simple example:
```{r}
x <- list(1, "a", 3)
x %>%
walk(print)
```
`walk()` is generally not that useful compared to `walk2()` or `pwalk()`. For example, if you had a list of plots and a vector of file names, you could use `pwalk()` to save each file to the corresponding location on disk:
```{r}
library(ggplot2)
plots <- mtcars %>%
split(.$cyl) %>%
map(~ggplot(., aes(mpg, wt)) + geom_point())
paths <- paste0(names(plots), ".pdf")
pwalk(list(paths, plots), ggsave, path = tempdir())
```
`walk()`, `walk2()` and `pwalk()` all invisibly return the `.x`, the first argument. This makes them suitable for use in the middle of pipelines.
## Predicates
Imagine we want to summarise each numeric column of a data frame. We could do it in two steps:
1. Find all numeric columns.
1. Sumarise summarise each column.
In code, that would look like:
```{r}
col_sum <- function(df, f) {
is_num <- df %>% map_lgl(is_numeric)
df[is_num] %>% map_dbl(f)
}
```
`is_numeric()` is a __predicate__: a function that returns either `TRUE` or `FALSE`. There are a number of of purrr functions designed to work specifically with predicates:
* `keep()` and `discard()` keeps/discards list elements where the predicate is
true.
* `head_while()` and `tail_while()` keep the first/last elements of a list until
you get the first element where the predicate is true.
* `some()` and `every()` determine if the predicate is true for any or all of
the elements.
* `detect()` and `detect_index()`
We could use `keep()` to simplify the summary function to:
```{r}
col_sum <- function(df, f) {
df %>%
keep(is.numeric) %>%
map_dbl(f)
}
```
I like this formulation because you can easily read the sequence of steps.
### Built-in predicates
Purrr comes with a number of predicate functions built-in:
| | lgl | int | dbl | chr | list | null |
|------------------|-----|-----|-----|-----|------|------|
| `is_logical()` | x | | | | | |
| `is_integer()` | | x | | | | |
| `is_double()` | | | x | | | |
| `is_numeric()` | | x | x | | | |
| `is_character()` | | | | x | | |
| `is_atomic()` | x | x | x | x | | |
| `is_list()` | | | | | x | |
| `is_vector()` | x | x | x | x | x | |
| `is_null()` | | | | | | x |
Compared to the base R functions, they only inspect the type of the object, not its attributes. This means they tend to be less suprising:
```{r}
is.atomic(NULL)
is_atomic(NULL)
is.vector(factor("a"))
is_vector(factor("a"))
```
Each predicate also comes with "scalar" and "bare" versions. The scalar version checks that the length is 1 and the bare version checks that the object is a bare vector with no S3 class.
```{r}
y <- factor(c("a", "b", "c"))
is_integer(y)
is_scalar_integer(y)
is_bare_integer(y)
```
### Exercises
1. A possible base R equivalent of `col_sum()` is:
```{r}
col_sum3 <- function(df, f) {
is_num <- sapply(df, is.numeric)
df_num <- df[, is_num]
sapply(df_num, f)
}
```
But it has a number of bugs as illustrated with the following inputs:
```{r, eval = FALSE}
df <- data.frame(z = c("a", "b", "c"), x = 1:3, y = 3:1)
# OK
col_sum3(df, mean)
# Has problems: don't always return numeric vector
col_sum3(df[1:2], mean)
col_sum3(df[1], mean)
col_sum3(df[0], mean)
```
What causes the bugs?
1. Carefully read the documentation of `is.vector()`. What does it actually
test for?

View File

@ -1,8 +1,3 @@
---
layout: default
title: Model assessment
---
# Model assessment
```{r setup-model, include=FALSE}

View File

@ -1,8 +1,3 @@
---
layout: default
title: Model visualisation
---
# Model visualisation
Gap minder

View File

@ -1,7 +1,4 @@
---
layout: default
title: Model
---
# Model
A model is a function that summarizes how the values of one variable vary in relation to the values of other variables. Models play a large role in hypothesis testing and prediction, but for the moment you should think of models just like you think of statistics. A statistic summarizes a *distribution* in a way that is easy to understand; and a model summarizes *covariation* in a way that is easy to understand. In other words, a model is just another way to describe data.
@ -11,7 +8,7 @@ This chapter will explain how to build useful models with R.
*Section 1* will show you how to build linear models, the most commonly used type of model. Along the way, you will learn R's model syntax, a general syntax that you can reuse with most of R's modeling functions.
*Section 2* will show you the best ways to use R's model output, which is often reguires additional wrangling.
*Section 2* will show you the best ways to use R's model output, which is often requires additional wrangling.
*Section 3* will teach you to build and interpret multivariate linear models, models that use more than one explanatory variable to explain the values of a response variable.
@ -36,7 +33,8 @@ library(broom)
Have you heard that a relationship exists between your height and your income? It sounds far-fetched---and maybe it is---but many people believe that taller people will be promoted faster and valued more for their work, an effect that increases their income. Could this be true?
Luckily, it is easy to measure a person's height, as well as their income (and a swath of other related variables), which means that we can collect data relevant to the question. In fact, the Bureau of Labor Statistics has been doing this in a controlled way for over 50 years. The BLS [National Longitudinal Surveys (NLS)](https://www.nlsinfo.org/) track the income, education, and life circumstances of a large cohort of Americans across several decades. In case you are wondering, the point of the NLS is not to study the relationship between height and income, that's just a lucky accident.
Luckily, it is easy to measure someone's height, as well as their income (and a swath of other variables besides), which means that we can collect data relevant to the question. In fact, the Bureau of Labor Statistics has been doing this in a controlled way for over 50 years. The BLS [National Longitudinal Surveys (NLS)](https://www.nlsinfo.org/) track the income, education, and life circumstances of a large cohort of Americans across several decades. In case you are wondering, the point of the NLS is not to study the relationship between height and income, that's just a lucky accident.
You can load the latest cross-section of NLS data, collected in 2013 with the code below.
@ -46,10 +44,10 @@ heights <- readRDS("data/heights.RDS")
I've narrowed the data down to 10 variables:
* `id` - A number ot identify each subject
* `id` - A number to identify each subject
* `income` - The self-reported income of each subject
* `height` - The height of each subject in inches
* `weight` - The weight of each subject in inches
* `weight` - The weight of each subject in pounds
* `sex` - The sex of each subject
* `race` - The race of each subject
* `education` - The number of years of education completed by each subject
@ -68,7 +66,7 @@ ggplot(data = heights, mapping = aes(x = height, y = income)) +
geom_point()
```
First, let's address a distraction: the data is censored in an odd way. The y variable is income, which means that there are no y values less than zero. That's not odd. However, there are also no y values above $180,331. In fact, there are a line of unusual values at exactly $180,331. This is because the Burea of Labor Statistics removed the top 2% of income values and replaced them with the mean value of the top 2% of values, an action that was not designed to enhance the usefulness of the data.
First, let's address a distraction: the data is censored in an odd way. The y variable is income, which means that there are no y values less than zero. That's not odd. However, there are also no y values above $180,331. In fact, there are a line of unusual values at exactly $180,331. This is because the Bureau of Labor Statistics removed the top 2% of income values and replaced them with the mean value of the top 2% of values, an action that was not designed to enhance the usefulness of the data for data science.
Also, you can see that heights have been rounded to the nearest inch.
@ -108,9 +106,9 @@ cor(heights$height, heights$income, use = "na")
A model describes the relationship between two or more variables. There are multiple ways to describe any relationship. Which is best?
A common choice: decide form of relationship, then minimize residuals.
A common choice: decide the form of the relationship, then minimize residuals.
Use R's `lm()` function to fit a linear model to your data. The first argument of `lm()` should be a formula, two or more varibles separated by a `~`. You've seen forumlas before, we used them in Chapter 2 to facet graphs.
Use R's `lm()` function to fit a linear model to your data. The first argument of `lm()` should be a formula, two or more variables separated by a `~`. You've seen formulas before, we used them in Chapter 2 to facet graphs.
```{r}
income ~ height
@ -143,7 +141,7 @@ lm(income ~ 0 + height, data = heights)
## Using model output
R model output is not very tidy. It is designed to provide a data store that you can extract information from with helper functions.
R's model output is not very tidy. It is designed to provide a data store from which you can extract information with helper functions.
```{r}
coef(h)
@ -151,7 +149,7 @@ predict(h)[1:5]
resid(h)[1:5]
```
The `broom` package provides the most useful helper functions for working with R models. `broom` functions return the most useful model information as a data frames, which lets you quickly embed the information into your data science workflow.
The `broom` package provides the most useful helper functions for working with R models. `broom` functions return the most useful model information as data frames, which lets you quickly embed the information into your data science workflow.
### tidy()
@ -185,7 +183,7 @@ There appears to be a relationship between a person's education and how poorly t
Patterns in the residuals suggest that relationships exist between y and other variables, even when the effect of heights is accounted for.
Add variables to a model by adding variables to the righthand side of the model formula.
Add variables to a model by adding variables to the right-hand side of the model formula.
```{r}
income ~ height + education
@ -195,7 +193,7 @@ tidy(he)
### Interpretation
The coefficient of each variable displays the change of income that is associated with a one unit change in the variable _when all other variables are held constant_.
The coefficient of each variable represents the increase in income associated with a one unit increase in the variable _when all other variables are held constant_.
### Interaction effects
@ -226,7 +224,7 @@ Each level of the factor (i.e. unique value) is encoded as an integer and displa
If you use factors outside of a model, you will notice some limiting behavior:
* You cannot add values to a factor that do not appear in its levels attribute
* You cannot add to a factor values that do not appear in its levels attribute
* factors retain all of their levels attribute when you subset them. To avoid this use `drop = TRUE`.
```{r}
fac[1]
@ -238,7 +236,7 @@ num_fac <- factor(1:3, levels = 1:3, labels = c("100", "200", "300"))
num_fac
as.numeric(num_fac)
```
To coerce these labels to a different data type, first coerce the factor to a charater string with `as.character()`
To coerce these labels to a different data type, first coerce the factor to a character string with `as.character()`
```{r}
as.numeric(as.character(num_fac))
```
@ -325,6 +323,7 @@ ns() # natural splines
```
```{r}
library(splines)
tidy(lm(income ~ ns(education, knots = c(10, 17)), data = heights))
tidy(lm(income ~ ns(education, df = 4), data = heights))
```
@ -341,7 +340,7 @@ ggplot(data = heights, mapping = aes(x= education, y = income)) +
```{r}
gam(income ~ s(education), data = heights)
ggplot(data = heights, mapping = aes(x= education, y = income)) +
ggplot(data = heights, mapping = aes(x = education, y = income)) +
geom_point() +
geom_smooth(method = gam, formula = y ~ s(x))
```
@ -362,7 +361,7 @@ gam(y ~ s(x, z), data = df)
We've avoided two things in this chapter that are usually conflated with models: hypothesis testing and predictive analysis.
There are other types of modeling algorithms; each provides a valid description about the data.
There are other types of modeling algorithms; each provides a valid description of the data.
Which description will be best? Does the relationship have a known form? Does the data have a known structure? Are you going to attempt hypothesis testing that imposes its own constraints?

260
pipes.Rmd Normal file
View File

@ -0,0 +1,260 @@
# Pipes
```{r, include = FALSE}
library(dplyr)
diamonds <- ggplot2::diamonds
```
Pipes let you transform the way you call deeply nested functions. Using a pipe doesn't affect what the code does; behind the scenes it is run in (almost) exactly the same way. What the pipe does is change how you write, and read, code.
You've been using the pipe for a while now, so you already understand the basics. The point of this chapter is to explore the pipe in more detail. You'll learn the alternatives that the pipe replaces, and the pros and cons of the pipe. Importantly, you'll also learn situations in which you should avoid the pipe.
The pipe, `%>%`, comes from the magrittr package by Stefan Milton Bache. This package provides a handful of other helpful tools if you explicitly load it. We'll explore some of those tools to close out the chapter.
## Piping alternatives
The point of the pipe is to help you write code in a way that easier to read and understand. To see why the pipe is so useful, we're going to explore a number of ways of writing the same code. Let's use code to tell a story about a little bunny named Foo Foo:
> Little bunny Foo Foo
> Went hopping through the forest
> Scooping up the field mice
> And bopping them on the head
We'll start by defining an object to represent little bunny Foo Foo:
```{r, eval = FALSE}
foo_foo <- little_bunny()
```
And we'll use a function for each key verb: `hop()`, `scoop()`, and `bop()`. Using this object and these verbs, there are (at least) four ways we could retell the story in code:
1. Save each intermediate step as a new object.
1. Overwrite the original object many times.
1. Compose functions.
1. Use the pipe.
We'll work through each approach, showing you the code and talking about the advantages and disadvantages.
### Intermediate steps
The simplest approach is to save each step as a new object:
```{r, eval = FALSE}
foo_foo_1 <- hop(foo_foo, through = forest)
foo_foo_2 <- scoop(foo_foo_1, up = field_mice)
foo_foo_3 <- bop(foo_foo_2, on = head)
```
The main downside of this form is that it forces you to name each intermediate element. If there are natural names, this form feels natural, and you should use it. But in this example, there aren't natural names, and we're adding numeric suffixes just to make the names unique. That leads to two problems: the code is cluttered with unimportant names, and you have to be carefully increment the suffix on each line. Whenever I write code like this, I invariably use the wrong number on one line and then spend 10 minutes scratching my head and trying to figure out what went wrong with my code.
You may worry that this form creates many intermediate copies of your data and takes up a lot of memory. First, worrying about memory is not a useful way to spend your time: worry about it when it becomes a problem (i.e. you run out of memory), not before. Second, R isn't stupid: if you're working with data frames, R will share columns where possible. Let's take a look at an actual data manipulation pipeline where we add a new column to the `diamonds` dataset from ggplot2:
```{r}
diamonds2 <- mutate(diamonds, price_per_carat = price / carat)
library(pryr)
object_size(diamonds)
object_size(diamonds2)
object_size(diamonds, diamonds2)
```
`pryr::object_size()` gives the memory occupied by all of its arguments. The results seem counterintuitive at first:
* `diamonds` takes up 3.46 MB,
* `diamonds2` takes up 3.89 MB,
* `diamonds` and `diamonds2` together take up 3.89 MB!
How can that work? Well, `diamonds2` has 10 columns in common with `diamonds`: there's no need to duplicate all that data, so the two data frames have variables in common. These variables will only get copied if you modify one of them.
In the following example, we modify a single value in `diamonds$carat`. That means the `carat` variable can no longer be shared between the two data frames, and a copy must be made. The individual size of each data frame will be unchanged, but the collective size increases:
```{r}
diamonds$carat[1] <- NA
object_size(diamonds)
object_size(diamonds2)
object_size(diamonds, diamonds2)
```
(Note that we use `pryr::object_size()` here, not the built-in `object.size()`. `object.size()` isn't quite smart enough to recognise that the columns are shared across multiple data frames.)
### Overwrite the original
Instead of creating intermediate objects at each step, we could overwrite the original object:
```{r, eval = FALSE}
foo_foo <- hop(foo_foo, through = forest)
foo_foo <- scoop(foo_foo, up = field_mice)
foo_foo <- bop(foo_foo, on = head)
```
This is less typing (and less thinking), so you're less likely to make mistakes. However, there are two problems:
1. Debugging is painful: if you make a mistake you'll need to re-run the
complete pipeline from the beginning.
1. The repetition of the object being transformed (we've written `foo_foo` six
times!) obscures what's changing on each line.
### Function composition
Another approach is to abandon assignment and just string the function calls together:
```{r, eval = FALSE}
bop(
scoop(
hop(foo_foo, through = forest),
up = field_mice
),
on = head
)
```
Here the disadvantage is that you have to read from inside-out, from right-to-left, and that the arguments end up spread far apart (evocatively called the
[dagwood sandwhich](https://en.wikipedia.org/wiki/Dagwood_sandwich) problem).
### Use the pipe
Finally, we can use the pipe:
```{r, eval = FALSE}
foo_foo %>%
hop(through = forest) %>%
scoop(up = field_mouse) %>%
bop(on = head)
```
This is my favourite form, because it focusses on verbs, not nouns. You can read this series of function compositions like it's a set of imperative actions. Foo foo hops, then scoops, then bops. The downside, of course, is that you need to be familiar with the pipe. If you've never seen `%>%` before, you'll have no idea what this code does. Fortunately, however, most people pick up the idea very quickly, so when you share you code with others who aren't familiar with the pipe, you can easily teach them.
The pipe works by doing "lexical transformation". Behind the scenes, magrittr reassemble the code in the pipe to a form that works by overwriting an intermediate object. When you run a pipe like the one above, magrittr does something like this:
```{r, eval = FALSE}
my_pipe <- function(.) {
. <- hop(., through = forest)
. <- scoop(., up = field_mice)
bop(., on = head)
}
my_pipe(foo_foo)
```
This means that the pipe won't work for two classes of functions:
1. Functions that use the current environment. For example, `assign()`
will create a new variable with the given name in the current environment:
```{r}
assign("x", 10)
x
"x" %>% assign(100)
x
```
The use of assign with the pipe does not work because it assigns it to
a temporary environment used by `%>%`. If you do want to use assign with the
pipe, you must be explicit about the environment:
```{r}
env <- environment()
"x" %>% assign(100, envir = env)
x
```
Other functions with this problem include `get()` and `load()`
1. Functions that make use lazy evaluation. In R, function arguments
are only computed when the function uses them, not prior to calling the
function. This means that the function can affect the global environment in
various ways. The pipe computed each element in turn, so you can't
rely on this behaviour.
One place that this is a problem is `tryCatch()`, which lets you capture
and handle errors:
```{r, error = TRUE}
tryCatch(stop("!"), error = function(e) "An error")
stop("!") %>%
tryCatch(error = function(e) "An error")
```
There are a relatively wide class of functions with this behaviour.
This includes `try()`, `supressMessages()`, and `suppressWarnings()`
in base R.
## When not to use the pipe
The pipe is a powerful tool, but it's not the only tool at your disposal, and it doesn't solve every problem! Pipes are most useful for rewriting a fairly short linear sequence of operations. I think you should reach for another tool when:
* Your pipes get longer than five or six lines. In that case, create
intermediate objects with meaningful names. That will make debugging easier,
because you can more easily check the intermediate results, and it makes
it easier to understand your code, because the variable names can help
communicate intent.
* You have multiple inputs or outputs. If there isn't one primary object
being transformed, but two or more objects being combined together,
don't use the pipe.
* You are starting to think about a directed graph with a complex
dependency structure. Pipes are fundamentally linear and expressing
complex relationships with them will typically yield confusing code.
## Other tools from magrittr
The pipe is provided by the magrittr package, by Stefan Milton Bache. Most of packages you work in this book automatically provide `%>%` for you. You might want to load magrittr yourself if you're using another package, or you want to access some of the other pipe variants that magrittr provides.
```{r}
library(magrittr)
```
* When working with more complex pipes, it's some times useful to call a
function for its side-effects. Maybe you want to print out the current
object, or plot it, or save it to disk. Many times, such functions don't
return anything, effectively terminating the pipe.
To work around this problem, you can use the "tee" pipe. `%T>%` works like
`%>%` except that it returns the LHS instead of the RHS. It's called
"tee" because it's like a literal T-shaped pipe.
```{r}
rnorm(100) %>%
matrix(ncol = 2) %>%
plot() %>%
str()
rnorm(100) %>%
matrix(ncol = 2) %T>%
plot() %>%
str()
```
* If you're working with functions that don't have a data frame based API
(i.e. you pass them individual vectors, not a data frame and expressions
to be evaluated in the context of that data frame), you might find `%$%`
useful. It "explodes" out the variables in a data frame so that you can
refer to them explicitly. This is useful when working with many functions
in base R:
```{r}
mtcars %$%
cor(disp, mpg)
```
* For assignment magrittr provides the `%<>%` operator which allows you to
replace code like:
```R
mtcars <- mtcars %>% transform(cyl = cyl * 2)
```
with
```R
mtcars %<>% transform(cyl = cyl * 2)
```
I'm not a fan of this operator because I think assignment is such a
special operation that it should always be clear when it's occurring.
In my opinion, a little bit of duplication (i.e. repeating the
name of the object twice), is fine in return for making assignment
more explicit.

View File

@ -1,6 +1,51 @@
---
layout: default
title: Save time by programming
---
# Programming
Computer-human communication matters.
Code is a tool of communication, not just to the computer, but to other people. This is important because every project you undertake is fundamentally collaborative. Even if you're not working with other people, you'll definitely be working with future-you. You want to write clear code so that future-you doesn't curse present-you when you look at a project again after several months have passed.
To me, improving your communication skills is a key part of mastering R as a programming language. Over time, you want your code to become more and more clear, and easier to write. Removing duplication is an important part of expressing yourself clearly because it lets the reader (i.e. future-you!) focus on what's different between operations rather than what's the same. The goal is not just to write better functions or to do things that you couldn't do before, but to code with more "ease". As you internalise the ideas in this chapter, you should find it easier to re-tackle problems that you've solved in the past with much effort.
In the following chapters, you'll learn important programming skills:
1. We'll start by diving deep into the __pipe__, `%>%`, talking more about how
it works, what the alternatives are, and when not to use the pipe.
1. Copy-and-paste is powerful tool, but you should avoid doing it more than
twice. Repeating yourself in code is dangerous because it can easily lead
to errors and inconsistencies. Instead, write __functions__ which let
you extract out repeated code so that it can be easily reused.
1. Functions extract out repeated code, but you often need to repeat the
same actions on multiple inputs. You need tools for __iteration__ that
let you do similar things again again. These tools include for loops
and functional programming.
1. As you start to write more powerful functions, you'll need a solid
grounding in R's data structures. You must master the four common atomic
vectors, the three important S3 classes built on top of them, and
understand the mysteries of the list and data frame.
1. One of the partiuclarly important data structures in R is the list.
Lists are important because a list can contain other lists, so is
__hierarchical__. Two common scenarios where hierarchical structures
arise are json, and fitting many models. You'll need to learn some new
tools from the purrr package to make handling these cases as easy as
possible.
The goal of these chapters is to teach you the minimum about programming that a practicising data scientist must know. It turns out this is a reasonable amount, and I think it's worth investing in your programming skills. It's an investment that won't pay off immediately, but over time it will allow you to solve new problems more quickly, and reuse your insights from previous problems in new scenarios.
Writing code is similar in many ways to writing prose. One parallel which I find particularly useful is that in both cases rewriting is key to clarity. The first expression of your ideas is unlikely to be particularly clear, and you may need to rewrite multiple times. After solving a data analysis challenge, it's often worth looking at your code and thinking about whether or not it's obvious what you've done. If you spend a little time rewriting your code while the ideas are fresh, you can save a lot of time later trying to recreate what your code did. But this doesn't mean you should rewrite every function: you need to balance what you need to achieve now with saving time in the long run. (But the more you rewrite your functions the more likely you'll first attempt will be clear.)
## Learning more
As you become a better R programmer, you'll learn more techniques for reducing various types of duplication. This allows you to do more with less, and allows you to express yourself more clearly by taking advantage of powerful programming constructs.
To learn more you need to study R as a programming language, not just an interactive environment for data science. We have written two books that will help you do so:
* [Hands on programming with R](http://shop.oreilly.com/product/0636920028574.do),
by Garrett Grolemund. This is an introduction to R as a programming language
and is a great place to start if R is your first programming language.
* [Advanced R](http://adv-r.had.co.nz) by Hadley Wickham. This dives into the
details of R the programming language. This is a great place to start if
you've programmed in other languages and you want to learn what makes R
special, different, and particularly well suited to data analysis.

View File

@ -15,6 +15,5 @@ LaTeX: XeLaTeX
AutoAppendNewline: Yes
StripTrailingWhitespace: Yes
BuildType: Package
PackageUseDevtools: Yes
PackageInstallArgs: --no-multiarch --with-keep.source
BuildType: Custom
CustomScriptPath: _build.sh

4
r4ds.css Normal file
View File

@ -0,0 +1,4 @@
.book .book-header h1 {
opacity: 1;
text-align: left;
}

View File

@ -1,49 +1,41 @@
---
layout: default
title: Relational data
---
# Relational data {#relation-data}
```{r setup-relation, include = FALSE}
library(dplyr)
library(nycflights13)
library(ggplot2)
source("common.R")
options(dplyr.print_min = 6, dplyr.print_max = 6)
knitr::opts_chunk$set(fig.path = "figures/", cache = TRUE)
```
It's rare that a data analysis involves only a single table of data. Typically you have many tables of data, and you must combine them to answer the questions that you're interested in. Collectively, multiple tables of data are called __relational data__ because it is the relations, not just the individual datasets, that are particularly important.
Relations are always defined between a pair of tables. All other relations are built up from this simple idea: the relations of three or more tables are always a property of the relations between each pair; sometimes both elements of a pair can be the same table.
Relations are always defined between a pair of tables. All other relations are built up from this simple idea: the relations of three or more tables are always a property of the relations between each pair; sometimes both elements of a pair can be the same table.
To work with relational data you need verbs that work with pairs of tables. There are three families of verbs design to work with relational data:
To work with relational data you need verbs that work with pairs of tables. There are three families of verbs designed to work with relational data:
* __Mutating joins__, which add new variables to one data frame from matching
* __Mutating joins__, which add new variables to one data frame from matching
rows in another.
* __Filtering joins__, which filter observations from one data frame based on
* __Filtering joins__, which filter observations from one data frame based on
whether or not they match an observation in the other table.
* __Set operations__, which treat observations like they were set elements.
The most common place to find relational data is in a _relational_ database management system, a term that encompasses almost all modern databases. If you've used a database before, you've almost certainly used SQL. If so, you should find the concepts in this chapter familiar, although their expression in dplyr is little different. Generally, dplyr is a little easier to use than SQL because it's specialised to data analysis: it makes common data analysis operations easier, at the expense of making it difficult to do other things.
The most common place to find relational data is in a _relational_ database management system, a term that encompasses almost all modern databases. If you've used a database before, you've almost certainly used SQL. If so, you should find the concepts in this chapter familiar, although their expression in dplyr is a little different. Generally, dplyr is a little easier to use than SQL because it's specialised to data analysis: it makes common data analysis operations easier, at the expense of making it difficult to do other things.
## nycflights13 {#nycflights13-relational}
You'll learn about relational data with other datasets from the nycflights13 package. As well as the `flights` table that you've worked with so far, nycflights13 contains a four related data frames:
You'll learn about relational data with other datasets from the nycflights13 package. As well as the `flights` table that you've worked with so far, nycflights13 contains four other related data frames:
* `airlines` lets you look up the full carrier name from its abbreviated
* `airlines` lets you look up the full carrier name from its abbreviated
code:
```{r}
airlines
```
* `airports` gives information about each airport, identified by the `faa`
airport code:
```{r}
airports
```
@ -53,7 +45,7 @@ You'll learn about relational data with other datasets from the nycflights13 pac
```{r}
planes
```
* `weather` gives the weather at each NYC airport for each hour:
```{r}
@ -66,16 +58,16 @@ One way to show the relationships between the different tables is with a drawing
knitr::include_graphics("diagrams/relational-nycflights.png")
```
This diagram is a little overwhelming, and even so it's simple compared to some you'll see in the wild! The key to understanding diagrams like this is to remember each relation always concerns a pair of tables. You don't need to understand the whole thing; you just need the understand the chain of relations between the tables that you are interested in.
This diagram is a little overwhelming, and even so it's simple compared to some you'll see in the wild! The key to understanding diagrams like this is to remember each relation always concerns a pair of tables. You don't need to understand the whole thing; you just need to understand the chain of relations between the tables that you are interested in.
For nycflights13:
* `flights` connects to `planes` via single variable, `tailnum`. `flights`
connect `airlines` with the `carrier` variable.
* `flights` connects to `airports` in two ways: via the `origin` or the
* `flights` connects to `airports` in two ways: via the `origin` or the
`dest`.
* `flights` connects to `weather` via `origin` (the location), and
`year`, `month`, `day` and `hour` (the time).
@ -87,7 +79,7 @@ For nycflights13:
1. I forgot to draw the a relationship between `weather` and `airports`.
What is the relationship and what should it look like in the diagram?
1. `weather` only contains information for the origin (NYC) airports. If
it contained weather records for all airports in the USA, what additional
relation would it define with `flights`?
@ -97,7 +89,7 @@ For nycflights13:
or reject this hypothesis using data.
1. We know that some days of the year are "special", and fewer people than
usual fly on them. How might you represent that data as a data frame?
usual fly on them. How might you represent that data as a data frame?
What would be the primary keys of that table? How would it connect to the
existing tables?
@ -108,11 +100,11 @@ The variables used to connect each pair of tables are called __keys__. A key is
There are two types of keys:
* A __primary key__ uniquely identifies an observation in its own table.
For example, `planes$tailnum` is a primary key because it uniquely identifies
For example, `planes$tailnum` is a primary key because it uniquely identifies
each plane.
* A __foreign key__ uniquely identifies an observation in another table.
For example, the `flights$tailnum` is a foregin key because it matches each
For example, the `flights$tailnum` is a foreign key because it matches each
flight to a unique plane.
A variable can be both part of primary key _and_ a foreign key. For example, `origin` is part of the `weather` primary key, and is also a foreign key for the `airport` table.
@ -124,36 +116,36 @@ planes %>% count(tailnum) %>% filter(n > 1)
weather %>% count(year, month, day, hour, origin) %>% filter(n > 1)
```
Sometimes a table does't have an explicit primary key: each row is an observation, but no combination of variables reliably identifies it. For example, what's the primary key in the `flights` table? You might think it would be the date plus the flight or tail number, but neither of those are unique:
Sometimes a table doesn't have an explicit primary key: each row is an observation, but no combination of variables reliably identifies it. For example, what's the primary key in the `flights` table? You might think it would be the date plus the flight or tail number, but neither of those are unique:
```{r}
flights %>% count(year, month, day, flight) %>% filter(n > 1)
flights %>% count(year, month, day, tailnum) %>% filter(n > 1)
```
When starting to work with this data, I had naively assumed that each flight number would be only used once per day: that would make it much easiser to communicate problems with a specific flight. Unfortunately that is not the case! If a table lacks a primary key, it's sometimes useful to add one with `row_number()`. That makes it easier to match observations if you've done some filtering and want to check back in with the original data. This is called a surrogate key.
When starting to work with this data, I had naively assumed that each flight number would be only used once per day: that would make it much easier to communicate problems with a specific flight. Unfortunately that is not the case! If a table lacks a primary key, it's sometimes useful to add one with `row_number()`. That makes it easier to match observations if you've done some filtering and want to check back in with the original data. This is called a surrogate key.
A primary key and the corresponding foreign key in another table form a __relation__. Relations are typically one-to-many. For example, each flight has one plane, but each plane has many flights. In other data, you'll occassionaly see a 1-to-1 relationship. You can think of this as a special case of 1-to-many. It's possible to model many-to-many relations with a many-to-1 relation plus a 1-to-many relation. For example, in this data there's a many-to-many relationship between airlines and airports: each airport flies to many airlines; each airport hosts many airlines.
A primary key and the corresponding foreign key in another table form a __relation__. Relations are typically one-to-many. For example, each flight has one plane, but each plane has many flights. In other data, you'll occasionally see a 1-to-1 relationship. You can think of this as a special case of 1-to-many. It's possible to model many-to-many relations with a many-to-1 relation plus a 1-to-many relation. For example, in this data there's a many-to-many relationship between airlines and airports: each airport flies to many airlines; each airport hosts many airlines.
### Exercises
1. Identify the keys in the following datasets
1. `Lahman::Batting`,
1. `Lahman::Batting`,
1. `babynames::babynames`
1. `nasaweather::atmos`
1. `fueleconomy::vehicles`
1. Draw a diagram illustrating the connections between the `Batting`,
`Master`, and `Salary` tables in the Lahman package. Draw another diagram
1. Draw a diagram illustrating the connections between the `Batting`,
`Master`, and `Salary` tables in the Lahman package. Draw another diagram
that shows the relationship between `Master`, `Managers`, `AwardsManagers`.
How would you characterise the relationship between the `Batting`,
How would you characterise the relationship between the `Batting`,
`Pitching`, and `Fielding` tables?
## Mutating joins {#mutating-joins}
The first tool we'll look at for combining a pair of tables is the __mutating join__. A mutating join allows you to combine variables from two tables. It first matches observations by their keys, then copies across variables from one table to the other.
The first tool we'll look at for combining a pair of tables is the __mutating join__. A mutating join allows you to combine variables from two tables. It first matches observations by their keys, then copies across variables from one table to the other.
Like `mutate()`, the join functions add variables to the right, so if you have a lot of variables already, the new variables won't get printed out. For these examples, we'll make it easier to see what's going on in the examples by creating a narrower dataset:
@ -161,19 +153,19 @@ Like `mutate()`, the join functions add variables to the right, so if you have a
(flights2 <- flights %>% select(year:day, hour, origin, dest, tailnum, carrier))
```
(When you're in RStudio, you can use `View()` to avoid this problem).
(When you're in RStudio, you can use `View()` to avoid this problem).
For example, imagine you want to add the full airline name to the `flights` data. You can combine the `airlines` and `carrier` data frames with `left_join()`:
```{r}
flights2 %>%
flights2 %>%
left_join(airlines, by = "carrier")
```
The result of joining airlines to flights is an additional variable: `carrier`. This is why I call this type of join a mutating join. In this case, you could have got to the same place using `mutate()` and basic subsetting:
```{r}
flights2 %>%
flights2 %>%
mutate(carrier = airlines$name[match(carrier, airlines$carrier)])
```
@ -243,7 +235,7 @@ Graphically, that looks like:
knitr::include_graphics("diagrams/join-outer.png")
```
The most commonly used join is the left join: you use this when ever you lookup additional data out of another table, becasuse it preserves the original observations even when there isn't a match. The left join should be your default join: use it unless you have a strong reason to prefer one of the others.
The most commonly used join is the left join: you use this whenever you lookup additional data out of another table, because it preserves the original observations even when there isn't a match. The left join should be your default join: use it unless you have a strong reason to prefer one of the others.
Another way to depict the different types of joins is with a Venn diagram:
@ -260,7 +252,7 @@ So far all the diagrams have assumed that the keys are unique. But that's not al
1. One table has duplicate keys. This is useful when you want to
add in additional information as there is typically a one-to-many
relationship.
```{r, echo = FALSE, out.width = "75%"}
knitr::include_graphics("diagrams/join-one-to-many.png")
```
@ -275,14 +267,14 @@ So far all the diagrams have assumed that the keys are unique. But that's not al
left_join(x, y, by = "key")
```
1. Both tables have duplicate keys. This is usually an error because in
1. Both tables have duplicate keys. This is usually an error because in
neither table do the keys uniquely identify an observation. When you join
duplicated keys, you get all possible combinations, the Cartesian product:
```{r, echo = FALSE, out.width = "75%"}
knitr::include_graphics("diagrams/join-many-to-many.png")
```
```{r}
x <- data_frame(key = c(1, 2, 2, 3), val_x = paste0("x", 1:4))
y <- data_frame(key = c(1, 2, 2, 3), val_y = paste0("y", 1:4))
@ -293,37 +285,37 @@ So far all the diagrams have assumed that the keys are unique. But that's not al
So far, the pairs of tables have always been joined by a single variable, and that variable has the same name in both tables. That constraint was encoded by `by = "key"`. You can use other values for `by` to connect the tables in other ways:
* The default, `by = NULL`, uses all variables that appear in both tables,
the so called __natural__ join. For example, the flights and weather tables
* The default, `by = NULL`, uses all variables that appear in both tables,
the so called __natural__ join. For example, the flights and weather tables
match on their common variables: `year`, `month`, `day`, `hour` and
`origin`.
```{r}
flights2 %>% left_join(weather)
```
* A character vector, `by = "x"`. This is like a natural join, but uses only
some of the common variables. For example, `flights` and `planes` have
`year` variables, but they mean different things so we only want to join by
* A character vector, `by = "x"`. This is like a natural join, but uses only
some of the common variables. For example, `flights` and `planes` have
`year` variables, but they mean different things so we only want to join by
`tailnum`.
```{r}
flights2 %>% left_join(planes, by = "tailnum")
```
Note that the `year` variables (which appear in both input data frames,
but are not constrained to be equal) are disambiguated in the output with
but are not constrained to be equal) are disambiguated in the output with
a suffix.
* A named character vector: `by = c("a" = "b")`. This will
match variable `a` in table `x` to variable `y` in table `b`. The
match variable `a` in table `x` to variable `b` in table `y`. The
variables from `x` will be used in the output.
For example, if we want to draw a map we need to combine the flights data
with the airports data which contains the location (`lat` and `long`) of
each airport. Each flight has an origin and destination `airport`, so we
each airport. Each flight has an origin and destination `airport`, so we
need to specify which one we want to join to:
```{r}
flights2 %>% left_join(airports, c("dest" = "faa"))
flights2 %>% left_join(airports, c("origin" = "faa"))
@ -334,33 +326,33 @@ So far, the pairs of tables have always been joined by a single variable, and th
1. Compute the average delay by destination, then join on the `airports`
data frame so you can show the spatial distribution of delays. Here's an
easy way to draw a map of the United States:
```{r, include = FALSE}
airports %>%
semi_join(flights, c("faa" = "dest")) %>%
ggplot(aes(lon, lat)) +
airports %>%
semi_join(flights, c("faa" = "dest")) %>%
ggplot(aes(lon, lat)) +
borders("state") +
geom_point() +
coord_quickmap()
```
You might want to use the `size` or `colour` of the points to display
the average delay for each airport.
1. Is there a relationship between the age of a plane and its delays?
1. What weather conditions make it more likely to see a delay?
1. What happened on June 13 2013? Display the spatial pattern of delays,
and then use google to cross-reference with the weather.
and then use Google to cross-reference with the weather.
```{r, eval = FALSE, include = FALSE}
worst <- filter(not_cancelled, month == 6, day == 13)
worst %>%
group_by(dest) %>%
summarise(delay = mean(arr_delay), n = n()) %>%
filter(n > 5) %>%
inner_join(airports, by = c("dest" = "faa")) %>%
worst %>%
group_by(dest) %>%
summarise(delay = mean(arr_delay), n = n()) %>%
filter(n > 5) %>%
inner_join(airports, by = c("dest" = "faa")) %>%
ggplot(aes(lon, lat)) +
borders("state") +
geom_point(aes(size = n, colour = delay)) +
@ -369,7 +361,7 @@ So far, the pairs of tables have always been joined by a single variable, and th
### Other implementations
`base::merge()` can perform all four types of mutating join:
`base::merge()` can perform all four types of mutating join:
dplyr | merge
-------------------|-------------------------------------------
@ -385,17 +377,17 @@ SQL is the inspiration for dplyr's conventions, so the translation is straightfo
dplyr | SQL
-----------------------------|-------------------------------------------
`inner_join(x, y, by = "z")` | `SELECT * FROM x INNER JOIN y USING (z)`
`left_join(x, y, by = "z")` | `SELECT * FROM x LEFT OUTER JOIN USING (z)`
`right_join(x, y, by = "z")` | `SELECT * FROM x RIGHT OUTER JOIN USING (z)`
`full_join(x, y, by = "z")` | `SELECT * FROM x FULL OUTER JOIN USING (z)`
`left_join(x, y, by = "z")` | `SELECT * FROM x LEFT OUTER JOIN y USING (z)`
`right_join(x, y, by = "z")` | `SELECT * FROM x RIGHT OUTER JOIN y USING (z)`
`full_join(x, y, by = "z")` | `SELECT * FROM x FULL OUTER JOIN y USING (z)`
Note that "INNER" and "OUTER" are optional, and often ommitted.
Note that "INNER" and "OUTER" are optional, and often omitted.
Joining different variables between the tables, e.g. `inner_join(x, y, by = c("a" = "b"))` uses a slightly different syntax in SQL: `SELECT * FROM x INNER JOIN y ON x.a = y.b`. As this syntax suggests SQL supports a wide range of join types than dplyr because you can connect the tables using constraints other than equiality (sometimes called non-equijoins).
Joining different variables between the tables, e.g. `inner_join(x, y, by = c("a" = "b"))` uses a slightly different syntax in SQL: `SELECT * FROM x INNER JOIN y ON x.a = y.b`. As this syntax suggests SQL supports a wide range of join types than dplyr because you can connect the tables using constraints other than equality (sometimes called non-equijoins).
## Filtering joins {#filtering-joins}
Filtering joins match obserations in the same way as mutating joins, but affect the observations, not the variables. There are two types:
Filtering joins match observations in the same way as mutating joins, but affect the observations, not the variables. There are two types:
* `semi_join(x, y)` __keeps__ all observations in `x` that have a match in `y`.
* `anti_join(x, y)` __drops__ all observations in `x` that have a match in `y`.
@ -403,7 +395,7 @@ Filtering joins match obserations in the same way as mutating joins, but affect
Semi-joins are useful for matching filtered summary tables back to the original rows. For example, imagine you've found the top ten most popular destinations:
```{r}
top_dest <- flights %>%
top_dest <- flights %>%
count(dest, sort = TRUE) %>%
head(10)
top_dest
@ -429,7 +421,7 @@ Graphically, a semi-join looks like this:
knitr::include_graphics("diagrams/join-semi.png")
```
Only the existence of a match is important; it doesn't match what observation is matched. This means that filtering joins never duplicate rows like mutating joins do:
Only the existence of a match is important; it doesn't matter which observation is matched. This means that filtering joins never duplicate rows like mutating joins do:
```{r, echo = FALSE, out.width = "50%"}
knitr::include_graphics("diagrams/join-semi-many.png")
@ -444,57 +436,57 @@ knitr::include_graphics("diagrams/join-anti.png")
Anti-joins are are useful for diagnosing join mismatches. For example, when connecting `flights` and `planes`, you might be interested to know that there are many `flights` that don't have a match in `planes`:
```{r}
flights %>%
anti_join(planes, by = "tailnum") %>%
flights %>%
anti_join(planes, by = "tailnum") %>%
count(tailnum, sort = TRUE)
```
### Exercises
1. What does it mean for a flight to have a missing `tailnum`? What do the
1. What does it mean for a flight to have a missing `tailnum`? What do the
tail numbers that don't have a matching record in `planes` have in common?
(Hint: one variable explains ~90% of the problem.)
1. Find the 48 hours (over the course of the whole year) that have the worst
delays. Cross-reference it with the `weather` data. Can you see any
patterns?
delays. Cross-reference it with the `weather` data. Can you see any
patterns?
1. What does `anti_join(flights, airports, by = c("dest" = "faa"))` tell you?
What does `anti_join(airports, flights, by = c("dest" = "faa"))` tell you?
What does `anti_join(airports, flights, by = c("faa" = "dest"))` tell you?
## Join problems
The data you've been working with in this chapter has been cleaned up so that you'll have as few problems as possible. Your own data is unlikely to be so nice, so there are a few things that you should do with your own data to make your joins go smoothly.
1. Start by identifying the variables that form the primary key in each table.
You should usually do this based on your understand of the data, not
empirically by looking for a combination of variables that give a
You should usually do this based on your understanding of the data, not
empirically by looking for a combination of variables that give a
unique identifier. If you just look for variables without thinking about
what they mean, you might get (un)lucky and find a combination that's
unique in your current data but the relationship might not be true in
general.
what they mean, you might get (un)lucky and find a combination that's
unique in your current data but the relationship might not be true in
general.
```{r}
airports %>% count(alt, lat) %>% filter(n > 1)
```
1. Check that none of the variables in the primary key are missing. If
1. Check that none of the variables in the primary key are missing. If
a value is missing then it can't identify an observation!
1. Check that your foreign keys match primary keys in another table. The
best way to do this is with an `anti_join()`. It's common for keys
not to match because of data entry errors. Fixing these is often a lot of
work.
If you do have missing keys, you'll need to be thoughtful about your
work.
If you do have missing keys, you'll need to be thoughtful about your
use of inner vs. outer joins, carefully considering whether or not you
want to drop rows that don't have a match.
Be aware that simply checking the number of rows before and after the join is not sufficient to ensure that your join has gone smoothly. If you have an inner join with duplicate keys in both tables, you might get unlikely at the number of dropped rows might exactly equal the number of duplicated rows!
Be aware that simply checking the number of rows before and after the join is not sufficient to ensure that your join has gone smoothly. If you have an inner join with duplicate keys in both tables, you might get unlucky as the number of dropped rows might exactly equal the number of duplicated rows!
## Set operations {#set-operations}
The final type of two-table verb is set operations. Generally, I use these the least frequently, but they are occassionally useful when you want to break a single complex filter into simpler pieces that you then combine.
The final type of two-table verb is set operations. Generally, I use these the least frequently, but they are occasionally useful when you want to break a single complex filter into simpler pieces that you then combine.
All these operations work with a complete row, comparing the values of every variable. These expect the `x` and `y` inputs to have the same variables, and treat the observations like sets:

View File

@ -1,8 +1,3 @@
---
layout: default
title: R Markdown
---
# R Markdown
Recommendations for learning more about communication:

View File

@ -1,11 +1,10 @@
---
layout: default
title: Robust code
---
```{r, include = FALSE}
library(magrittr)
```
## Robust code
# Robust code
(This is an advanced topic. You shouldn't worry too much about it when you first start writing functions. Instead you should focus on getting a function that works right for the easiest 80% of the problem. Then in time, you'll learn how to get to 99% with minimal extra effort. The defaults in this book should steer you in the right direction: we avoid teaching you functions with major suprises.)
(This is an advanced topic. You shouldn't worry too much about it when you first start writing functions. Instead you should focus on getting a function that works right for the easiest 80% of the problem. Then in time, you'll learn how to get to 99% with minimal extra effort. The defaults in this book should steer you in the right direction: we avoid teaching you functions with major surprises.)
In this section you'll learn an important principle that lends itself to reliable and readable code: favour code that can be understood with a minimum of context. On one extreme, take this code:
@ -19,13 +18,13 @@ What does it do? You can glean only a little from the context: `foo()` is a func
df2 <- arrange(df, qux)
```
It's now much easier to see what's going on! Function and variable names are important because they tell you about (or at least jog your memory of) what the code does. That helps you understand code in isolation, even if you don't completely understand all the details. Unfortunately naming things is hard, and its hard to give concrete advice apart from giving objects short but evocative names. As autocomplete in RStudio has gotten better, I've tended to use longer names that are more descriptive. Short names are faster to type, but you write code relatively infrequently compared to the number of times that you read it.
It's now much easier to see what's going on! Function and variable names are important because they tell you about (or at least jog your memory of) what the code does. That helps you understand code in isolation, even if you don't completely understand all the details. Unfortunately naming things is hard, and it's hard to give concrete advice apart from giving objects short but evocative names. As autocomplete in RStudio has gotten better, I've tended to use longer names that are more descriptive. Short names are faster to type, but you write code relatively infrequently compared to the number of times that you read it.
The idea of minimising the context needed to understand your code goes beyond just good naming. You also want to favour functions with predictable behaviour and few surprises. If a function does radically different things when its inputs differ slightly, you'll need to carefully read the surrounding context in order to predict what it will do. The goal of this section is to educate you about the most common ways R functions can be surprising and to provide you with unsurprising alternatives.
There are three common classes of surprises in R:
1. Unstable types: What what will `df[, x]` return? You can assume that `df`
1. Unstable types: What will `df[, x]` return? You can assume that `df`
is a data frame and `x` is a vector because of their names. But you don't
know whether this code will return a data frame or a vector because the
behaviour of `[` depends on the length of x.
@ -43,7 +42,7 @@ You might notice that these issues revolve around data frames. That's unfortunat
```{r}
df <- data.frame(xy = c("x", "y"))
# Character vectors work hard to work with for a long time, so R
# Character vectors were hard to work with for a long time, so R
# helpfully converts to a factor for you:
class(df$xy)
@ -68,7 +67,7 @@ df$x
### Unpredictable types
One of the most frustrating for programming is they way `[` returns a vector if the result has a single column, and returns a data frame otherwise. In other words, if you see code like `df[x, ]` you can't predict what it will return without knowing the value of `x`. This can trip you up in surprising ways. For example, imagine you've written this function to return the last row of a data frame:
One of the aspects most frustrating for programming is that `[` returns a vector if the result has a single column, and returns a data frame otherwise. In other words, if you see code like `df[x, ]` you can't predict what it will return without knowing the value of `x`. This can trip you up in surprising ways. For example, imagine you've written this function to return the last row of a data frame:
```{r}
last_row <- function(df) {
@ -117,7 +116,7 @@ df[3:4] %>% sapply(class) %>% str()
In the next chapter, you'll learn about the purrr package which provides a variety of alternatives. In this case, you could use `map_chr()` which always returns a character vector: if it can't, it will throw an error. Another option is the base `vapply()` function which takes a third argument indicating what the output should look like.
This doesn't make `sapply()` bad and `vapply()` and `map_chr()` good. `sapply()` is nice because you can use it interactively without having to think about what `f` will return. 95% of the time it will do the right thing, and if it doesn't you can quickly fix it. `map_chr()` is more important when your programming because a clear error message is more valuable when an operation is buried deep inside a tree of function calls. At this point its worth thinking more about
This doesn't make `sapply()` bad and `vapply()` and `map_chr()` good. `sapply()` is nice because you can use it interactively without having to think about what `f` will return. 95% of the time it will do the right thing, and if it doesn't you can quickly fix it. `map_chr()` is more important when you're programming because a clear error message is more valuable when an operation is buried deep inside a tree of function calls. At this point it's worth thinking more about
### Non-standard evaluation
@ -185,11 +184,11 @@ big_x <- function(df, threshold) {
}
```
Because dplyr currently has no way to force a name to be interpreted as either a local or parent variable, as I've only just realised that's really you should avoid NSE. In a future version you should be able to do:
Because dplyr currently has no way to force a name to be interpreted as either a local or parent variable, as I've only just realised, that's really why you should avoid NSE. In a future version you should be able to do:
```{r}
big_x <- function(df, threshold) {
dplyr::filter(df, .this$x > .parent$threshold)
dplyr::filter(df, local(x) > parent(threshold))
}
```
@ -213,7 +212,7 @@ Functions are easiest to reason about if they have two properties:
The first property is particularly important. If a function has hidden additional inputs, it's very difficult to even know where the important context is!
The biggest breaker of this rule in base R are functions that create data frames. Most of these functions have a `stringsAsFactors` argument that defaults to `getOption("stringsAsFactors")`. This means that a global option affects the operation of a very large number of functions, and you need to be aware that depending on an external state a function might produce either a character vector or a factor. In this book, we steer you away from that problem by recommnding functions like `readr::read_csv()` and `dplyr::data_frame()` that don't rely on this option. But be aware of it! Generally if a function is affected by a global option, you should avoid setting it.
The biggest breakers of this rule in base R are functions that create data frames. Most of these functions have a `stringsAsFactors` argument that defaults to `getOption("stringsAsFactors")`. This means that a global option affects the operation of a very large number of functions, and you need to be aware that, depending on an external state, a function might produce either a character vector or a factor. In this book, we steer you away from that problem by recommending functions like `readr::read_csv()` and `dplyr::data_frame()` that don't rely on this option. But be aware of it! Generally if a function is affected by a global option, you should avoid setting it.
Only use `options()` to control side-effects of a function. The value of an option should never affect the return value of a function. There are only three violations of this rule in base R: `stringsAsFactors`, `encoding`, `na.action`. For example, base R lets you control the number of digits printed in default displays with (e.g.) `options(digits = 3)`. This is a good use of an option because it's something that people frequently want control over, but doesn't affect the computation of a result, just its display. Follow this principle with your own use of options.

View File

@ -1,6 +1,3 @@
---
layout: default
title: Do science with data
---
# Do science with data
The scientific method guides data science. Data science solves known problems with the scientific method.

Binary file not shown.

After

Width:  |  Height:  |  Size: 189 KiB

View File

@ -1,6 +1 @@
---
layout: default
title: Shiny
---
# Shiny

View File

@ -1,11 +1,6 @@
---
layout: default
title: Strings
---
# Strings
```{r setup-strings, include = FALSE}
```{r setup-strings, include = FALSE, cache = FALSE}
library(stringr)
common <- rcorpora::corpora("words/common")$commonWords
@ -37,7 +32,7 @@ single_quote <- '\'' # or "'"
That means if you want to include a literal `\`, you'll need to double it up: `"\\"`.
Beware that the printed representation of the string is not the same as string itself, because the printed representation shows the escapes. To see the raw contents of the string, use writeLines()`:
Beware that the printed representation of the string is not the same as string itself, because the printed representation shows the escapes. To see the raw contents of the string, use `writeLines()`:
```{r}
x <- c("\"", "\\")
@ -45,7 +40,7 @@ x
writeLines(x)
```
There are a handful of other special characters. The most common used are `"\n"`, new line, and `"\t"`, tab, but you can see the complete list by requesting help on `"`: `?'"'`, or `?"'"`. You'll also sometimes strings like `"\u00b5"`, this is a way of writing non-English characters that works on all platforms:
There are a handful of other special characters. The most common used are `"\n"`, newline, and `"\t"`, tab, but you can see the complete list by requesting help on `"`: `?'"'`, or `?"'"`. You'll also sometimes see strings like `"\u00b5"`, this is a way of writing non-English characters that works on all platforms:
```{r}
x <- "\u00b5"
@ -54,7 +49,7 @@ x
### String length
Base R contains many functions to work with strings but we'll generally avoid them because they're inconsistent and hard to remember. Their behaviour is particularly inconsistent when it comes to missing values. For examle, `nchar()`, which gives the length of a string, returns 2 for `NA` (instead of `NA`)
Base R contains many functions to work with strings but we'll generally avoid them because they're inconsistent and hard to remember. Their behaviour is particularly inconsistent when it comes to missing values. For example, `nchar()`, which gives the length of a string, returns 2 for `NA` (instead of `NA`)
```{r}
# Bug will be fixed in R 3.3.0
@ -147,7 +142,7 @@ x
### Locales
Above I used`str_to_lower()` to change to lower case. You can also use `str_to_upper()` or `str_to_title()`. However, changing case is more complicated than it might at first seem because different languages have different rules for changing case. You can pick which set of rules to use by specifying a locale:
Above I used `str_to_lower()` to change to lower case. You can also use `str_to_upper()` or `str_to_title()`. However, changing case is more complicated than it might at first seem because different languages have different rules for changing case. You can pick which set of rules to use by specifying a locale:
```{r}
# Turkish has two i's: with and without a dot, and it
@ -158,7 +153,7 @@ str_to_upper(c("i", "ı"), locale = "tr")
The locale is specified as ISO 639 language codes, which are two or three letter abbreviations. If you don't already know the code for your language, [Wikipedia](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) has a good list. If you leave the locale blank, it will use the current locale.
Another important operation that's affected by the locale is sorting. The base R `order()` and `sort()` functions sort strings using the currect locale. If you want robust behaviour across different computers, you may want to use `str_sort()` and `str_order()` which take an additional `locale` argument:
Another important operation that's affected by the locale is sorting. The base R `order()` and `sort()` functions sort strings using the current locale. If you want robust behaviour across different computers, you may want to use `str_sort()` and `str_order()` which take an additional `locale` argument:
```{r}
x <- c("apple", "eggplant", "banana")
@ -191,9 +186,9 @@ str_sort(x, locale = "haw") # Hawaiian
Regular expressions, regexps for short, are a very terse language that allow to describe patterns in strings. They take a little while to get your head around, but once you've got it you'll find them extremely useful.
To learn regular expressions, we'll use `str_show()` and `str_show_all()`. These functions take a character vector and a regular expression, and shows you how they match. We'll start with very simple regular expressions and then gradually get more and more complicated. Once you've mastered pattern matching, you'll learn how to apply those ideas with various stringr functions.
To learn regular expressions, we'll use `str_show()` and `str_show_all()`. These functions take a character vector and a regular expression, and show you how they match. We'll start with very simple regular expressions and then gradually get more and more complicated. Once you've mastered pattern matching, you'll learn how to apply those ideas with various stringr functions.
### Basics matches
### Basic matches
The simplest patterns match exact strings:
@ -202,7 +197,7 @@ x <- c("apple", "banana", "pear")
str_view(x, "an")
```
The next step up in complexity is `.`, which matches any character (except a new line):
The next step up in complexity is `.`, which matches any character (except a newline):
```{r, cache = FALSE}
str_view(x, ".a.")
@ -254,7 +249,7 @@ str_view(x, "^a")
str_view(x, "a$")
```
To remember which is which, try this mneomic which I learned from [Evan Misshula](https://twitter.com/emisshula/status/323863393167613953): if you begin with power (`^`), you end up with money (`$`).
To remember which is which, try this mnemonic which I learned from [Evan Misshula](https://twitter.com/emisshula/status/323863393167613953): if you begin with power (`^`), you end up with money (`$`).
To force a regular expression to only match a complete string, anchor it with both `^` and `$`.:
@ -289,7 +284,7 @@ You can also match the boundary between words with `\b`. I don't find I often us
There are number of other special patterns that match more than one character:
* `.`: any character apart from a new line.
* `.`: any character apart from a newline.
* `\d`: any digit.
* `\s`: any whitespace (space, tab, newline).
* `[abc]`: match a, b, or c.
@ -303,7 +298,7 @@ You can use _alternation_ to pick between one or more alternative patterns. For
str_view(c("abc", "xyz"), "abc|xyz")
```
Like with mathematical expression, if precedence ever gets confusing, use parentheses to make it clear what you want:
Like with mathematical expressions, if precedence ever gets confusing, use parentheses to make it clear what you want:
```{r, cache = FALSE}
str_view(c("grey", "gray"), "gr(e|a)y")
@ -315,7 +310,7 @@ str_view(c("grey", "gray"), "gr(e|a)y")
1. Start with a vowel.
1. That only contain constants. (Hint: thinking about matching
1. That only contain consonants. (Hint: thinking about matching
"not"-vowels.)
1. End with `ed`, but not with `eed`.
@ -330,7 +325,7 @@ str_view(c("grey", "gray"), "gr(e|a)y")
### Repetition
The next step up in power involves control how many times a pattern matches:
The next step up in power involves control over how many times a pattern matches:
* `?`: 0 or 1
* `+`: 1 or more
@ -348,12 +343,12 @@ By default these matches are "greedy": they will match the longest string possib
```{r}
```
Note that the precedence of these operators are high, so you can write: `colou?r` to match either American or British spellings. That means most uses will need parentheses, like `bana(na)+` or `ba(na){2,}`.
Note that the precedence of these operators is high, so you can write: `colou?r` to match either American or British spellings. That means most uses will need parentheses, like `bana(na)+` or `ba(na){2,}`.
#### Exercises
1. Describe in words what these regular expressions match:
(read carefully to see I'm using a regular expression or a string
(read carefully to see if I'm using a regular expression or a string
that defines a regular expression.)
1. `^.*$`
@ -364,12 +359,12 @@ Note that the precedence of these operators are high, so you can write: `colou?r
1. Create regular expressions to find all words that:
1. Have three or more vowels in a row.
1. Start with three consonants
1. Have two or more vowel-consontant pairs in a row.
1. Start with three consonants.
1. Have two or more vowel-consonant pairs in a row.
### Grouping and backreferences
You learned about parentheses earlier as a way to disambiguate complex expression. They do one other special thing: they also define numeric groups that you can refer to with _backreferences_, `\1`, `\2` etc.For example, the following regular expression finds all fruits that have a pair letters that's repeated.
You learned about parentheses earlier as a way to disambiguate complex expression. They do one other special thing: they also define numeric groups that you can refer to with _backreferences_, `\1`, `\2` etc.For example, the following regular expression finds all fruits that have a pair of letters that's repeated.
```{r, cache = FALSE}
str_view(fruit, "(..)\\1", match = TRUE)
@ -400,15 +395,15 @@ str_detect(c("grey", "gray"), "gr(?:e|a)y")
## Tools
Now that you've learned the basics of regular expression, it's time to learn how to apply to real problems. In this section you'll learn a wide array of stringr functions that let you:
Now that you've learned the basics of regular expressions, it's time to learn how to apply them to real problems. In this section you'll learn a wide array of stringr functions that let you:
* Determine which elements match a pattern.
* Find the positions of matches.
* Extract the content of matches.
* Replace matches with new values.
* How can you split a string into based on a match.
* How can you split a string based on a match.
Because regular expressions are so powerful, it's easy to try and solve every problem with a single regular expression. But since you're in a programming language, it's often easy to break the problem down into smaller pieces. If you find yourself getting stuck trying to create a single regexp that solves your problem, take a step back and think if you could break the problem down in to smaller pieces, solving each challenge before moving onto the next one.
Because regular expressions are so powerful, it's easy to try and solve every problem with a single regular expression. But since you're in a programming language, it's often easy to break the problem down into smaller pieces. If you find yourself getting stuck trying to create a single regexp that solves your problem, take a step back and think if you could break the problem down into smaller pieces, solving each challenge before moving onto the next one.
### Detect matches
@ -419,7 +414,7 @@ x <- c("apple", "banana", "pear")
str_detect(x, "e")
```
Remember that when you use a logical vector in a numeric context, `FALSE` becomes 0 and `TRUE` becomes 1. That makes `sum()` and `mean()` useful if you want answer questions about matches across a larger vector:
Remember that when you use a logical vector in a numeric context, `FALSE` becomes 0 and `TRUE` becomes 1. That makes `sum()` and `mean()` useful if you want to answer questions about matches across a larger vector:
```{r}
# How many common words start with t?
@ -438,7 +433,7 @@ no_vowels_2 <- str_detect(common, "^[^aeiou]+$")
all.equal(no_vowels_1, no_vowels_2)
```
The results are identical, but I think the first approach is significantly easier to understand. So if you find your regular expression is getting overly complicated, try breaking it up into smaller pieces, giving each piece a name, and then combining with logical operations.
The results are identical, but I think the first approach is significantly easier to understand. So if you find your regular expression is getting overly complicated, try breaking it up into smaller pieces, giving each piece a name, and then combining them with logical operations.
A common use of `str_detect()` is to select the elements that match a pattern. You can do this with logical subsetting, or the convenient `str_subset()` wrapper:
@ -468,7 +463,7 @@ Note the use of `str_view_all()`. As you'll shortly learn, many stringr function
### Exercises
1. For each of the following challenges, try solving it both a single
1. For each of the following challenges, try solving it by using both a single
regular expression, and a combination of multiple `str_detect()` calls.
1. Find all words that start or end with `x`.
@ -483,7 +478,7 @@ Note the use of `str_view_all()`. As you'll shortly learn, many stringr function
### Extract matches
To extract the actual text of a match, use `str_extract()`. To show that off, we're going to need a more complicated example. I'm going to use the [Harvard sentences](https://en.wikipedia.org/wiki/Harvard_sentences), which were designed to tested VOIP systems, but are also useful for practicing regexs.
To extract the actual text of a match, use `str_extract()`. To show that off, we're going to need a more complicated example. I'm going to use the [Harvard sentences](https://en.wikipedia.org/wiki/Harvard_sentences), which were designed to test VOIP systems, but are also useful for practicing regexes.
```{r}
length(sentences)
@ -543,7 +538,7 @@ str_extract_all(x, "[a-z]", simplify = TRUE)
### Grouped matches
Earlier in this chapter we talked about the use of parentheses for clarifying precedence and to use with backreferences when matching. You can also parentheses to extract parts of a complex match. For example, imagine we want to extract nouns from the sentences. As a heuristic, we'll look for any word that comes after "a" or "the". Defining a "word" in a regular expression is a little tricky. Here I use a sequence of at least one character that isn't a space.
Earlier in this chapter we talked about the use of parentheses for clarifying precedence and for backreferences when matching. You can also use parentheses to extract parts of a complex match. For example, imagine we want to extract nouns from the sentences. As a heuristic, we'll look for any word that comes after "a" or "the". Defining a "word" in a regular expression is a little tricky. Here I use a sequence of at least one character that isn't a space.
```{r}
noun <- "(a|the) ([^ ]+)"
@ -607,7 +602,7 @@ sentences %>%
#### Exercises
1. Replace all `/` in a string with `\`.
1. Replace all `/`'s in a string with `\`'s.
### Splitting
@ -619,7 +614,7 @@ sentences %>%
str_split(" ")
```
Because each component might contain a different number of pieces, this returns a list. If you're working with a length-1 vector, the easiest thing is to just extra the first element of the list:
Because each component might contain a different number of pieces, this returns a list. If you're working with a length-1 vector, the easiest thing is to just extract the first element of the list:
```{r}
"a|b|c|d" %>%
@ -635,7 +630,7 @@ sentences %>%
str_split(" ", simplify = TRUE)
```
You can also request a maximum number of pieces;
You can also request a maximum number of pieces:
```{r}
fields <- c("Name: Hadley", "County: NZ", "Age: 35")
@ -657,7 +652,7 @@ str_split(x, boundary("word"))[[1]]
1. Split up a string like `"apples, pears, and bananas"` into individual
components.
1. Why is it's better to split up by `boundary("word")` than `" "`?
1. Why is it better to split up by `boundary("word")` than `" "`?
1. What does splitting with an empty string (`""`) do?
@ -697,7 +692,7 @@ You can use the other arguments of `regex()` to control details of the match:
```
* `comments = TRUE` allows you to use comments and white space to make
complex regular expressions more understand. Space are ignored, as is
complex regular expressions more understandable. Spaces are ignored, as is
everything after `#`. To match a literal space, you'll need to escape it:
`"\\ "`.
@ -707,7 +702,7 @@ There are three other functions you can use instead of `regex()`:
* `fixed()`: matches exactly the specified sequence of bytes. It ignores
all special regular expressions and operates at a very low level.
This allows you to avoid complex escaping can be much faster than
This allows you to avoid complex escaping and can be much faster than
regular expressions:
```{r}
@ -732,7 +727,7 @@ There are three other functions you can use instead of `regex()`:
```
They render identically, but because they're defined differently,
`fixed()` does find a match. Instead, you can use `coll()`, defined
`fixed()` doesn't find a match. Instead, you can use `coll()`, defined
next to respect human character comparison rules:
```{r}
@ -764,12 +759,12 @@ There are three other functions you can use instead of `regex()`:
stringi::stri_locale_info()
```
The downside of `coll()` is because the rules for recognising which
The downside of `coll()` is speed; because the rules for recognising which
characters are the same are complicated, `coll()` is relatively slow
compared to `regex()` and `fixed()`.
* As you saw with `str_split()` you can use `boundary()` to match boundaries.
You can also use it with the other functions, all though
You can also use it with the other functions:
```{r, cache = FALSE}
x <- "This is a sentence."
@ -788,7 +783,7 @@ There are three other functions you can use instead of `regex()`:
There are a few other functions in base R that accept regular expressions:
* `apropos()` searchs all objects avaiable from the global environment. This
* `apropos()` searches all objects available from the global environment. This
is useful if you can't quite remember the name of the function.
```{r}
@ -796,7 +791,7 @@ There are a few other functions in base R that accept regular expressions:
```
* `dir()` lists all the files in a directory. The `pattern` argument takes
a regular expression and only return file names that match the pattern.
a regular expression and only returns file names that match the pattern.
For example, you can find all the rmarkdown files in the current
directory with:
@ -818,9 +813,9 @@ There are a few other functions in base R that accept regular expressions:
### The stringi package
stringr is built on top of the __stringi__ package. stringr is useful when you're learning because it exposes a minimal set of functions, that have been carefully picked to handle the most common string manipulation functions. stringi on the other hand is designed to be comprehensive. It contains almost every function you might ever need. stringi has `length(ls("package:stringi"))` functions to stringr's `length(ls("package:stringr"))`.
stringr is built on top of the __stringi__ package. stringr is useful when you're learning because it exposes a minimal set of functions, that have been carefully picked to handle the most common string manipulation functions. stringi on the other hand is designed to be comprehensive. It contains almost every function you might ever need. stringi has `r length(ls(getNamespace("stringi")))` functions to stringr's `r length(ls("package:stringr"))`.
So if you find yourself struggling to do something that doesn't seem natural in stringr, it's worth taking a look at stringi. The use of the two packages are very similar because stringr was designed to mimic stringi's interface. The main difference is the prefix: `str_` vs `stri_`.
So if you find yourself struggling to do something that doesn't seem natural in stringr, it's worth taking a look at stringi. The use of the two packages is very similar because stringr was designed to mimic stringi's interface. The main difference is the prefix: `str_` vs `stri_`.
### Encoding
@ -832,7 +827,7 @@ Complicated and fraught with difficulty. Best approach is to convert to UTF-8 as
Generally, you should fix encoding problems during the data import phase.
Detect encoding operates statistically, by comparing frequency of byte fragments across languages and encodings. Fundamentally heuristic and works better with larger amounts of text (i.e. a whole file, not a single string from that file).
Detect encoding operates statistically, by comparing frequency of byte fragments across languages and encodings. It's fundamentally heuristic and works better with larger amounts of text (i.e. a whole file, not a single string from that file).
```{r}
x <- "\xc9migr\xe9 cause c\xe9l\xe8bre d\xe9j\xe0 vu."

View File

@ -1,14 +1,9 @@
---
layout: default
title: Tidy data
---
# Tidy data
> "Tidy datasets are all alike but every messy dataset is messy in its
> own way." Hadley Wickham
Data science, at its heart, is a computer programming exercise. Data scientists use computers to store, transform, visualize, and model their data. Each computer program will expect your data to be organized in a predetermined way, which may vary from program to program. To be an effective data scientist, you will need to be able to reorganize your data to match the format required by your program.
Data science, at its heart, is a computer programming exercise. Data scientists use computers to store, transform, visualize, and model their data. Each computer program will expect your data to be organized in a predetermined way, which may vary from program to program. To be an effective data scientist, you will need to be able to reorganize your data to match the format required by your program.
In this chapter, you will learn the best way to organize your data for R, a task that we call data tidying. Tidying your data will save you hours of time and make your data much easier to visualize, transform, and model with R.
@ -79,7 +74,7 @@ At this point, you might think that tidy data is so obvious that it is trivial.
Tidy data works well with R because it takes advantage of R's traits as a vectorized programming language. Data structures in R are organized around vectors, and R's functions are optimized to work with vectors. Tidy data takes advantage of both of these traits.
Tidy data arranges values so that the relationships between variables in a data set will parallel the relationship between vectors in R's storage objects. R stores tabular data as a data frame, a list of atomic vectors arranged to look like a table. Each column in the table is an atomic vector in the list. In tidy data, each variable in the data set is assigned to its own column, i.e., its own vector in the data frame.
Tidy data arranges values so that the relationships between variables in a data set will parallel the relationship between vectors in R's storage objects. R stores tabular data as a data frame, a list of atomic vectors arranged to look like a table. Each column in the table is an atomic vector in the list. In tidy data, each variable in the data set is assigned to its own column, i.e., its own vector in the data frame.
```{r, echo = FALSE}
knitr::include_graphics("images/tidy-2.png")
@ -87,7 +82,7 @@ knitr::include_graphics("images/tidy-2.png")
*A data frame is a list of vectors that R displays as a table. When your data is tidy, the values of each variable fall in their own column vector.*
As a result, you can extract the all of the values of a variable in a tidy data set by extracting the column vector that contains the variable. You can do this easily with R's list syntax, i.e.
As a result, you can extract all the values of a variable in a tidy data set by extracting the column vector that contains the variable. You can do this easily with R's list syntax, i.e.
```{r}
table1$cases
@ -191,7 +186,7 @@ After you collect your input, you can calculate the rate.
```{r eval = FALSE}
# Data set four
cases <- c(table4$1999, table4$2000, table4$2001)
cases <- c(table4$1999, table4$2000, table4$2001)
population <- c(table5$1999, table5$2000, table5$2001)
cases / population * 10000
```
@ -214,7 +209,7 @@ The two most important functions in `tidyr` are `gather()` and `spread()`. Each
A key value pair is a simple way to record information. A pair contains two parts: a *key* that explains what the information describes, and a *value* that contains the actual information. So for example, this would be a key value pair:
Password: 0123456789
Password: 0123456789
`0123456789` is the value, and it is associated with the key `Password`.
@ -238,7 +233,7 @@ Data values form natural key value pairs. The value is the value of the pair and
Cases: 80488
Cases: 212258
Cases: 213766
However, the key value pairs would cease to be a useful data set because you no longer know which values belong to the same observation.
Every cell in a table of data contains one half of a key value pair, as does every column name. In tidy data, each cell will contain a value and each column name will contain a key, but this doesn't need to be the case for untidy data. Consider `table2`.
@ -247,7 +242,7 @@ Every cell in a table of data contains one half of a key value pair, as does eve
table2
```
In `table2`, the `key` column contains only keys (and not just because the column is labelled `key`). Conveniently, the `value` column contains the values associated with those keys.
In `table2`, the `key` column contains only keys (and not just because the column is labeled `key`). Conveniently, the `value` column contains the values associated with those keys.
You can use the `spread()` function to tidy this layout.
@ -269,7 +264,7 @@ knitr::include_graphics("images/tidy-8.png")
*`spread()` distributes a pair of key:value columns into a field of cells. The unique keys in the key column become the column names of the field of cells.*
You can see that `spread()` maintains each of the relationships expressed in the original data set. The output contains the four original variables, *country*, *year*, *population*, and *cases*, and the values of these variables are grouped according to the orginal observations. As a bonus, now the layout of these relationships is tidy.
You can see that `spread()` maintains each of the relationships expressed in the original data set. The output contains the four original variables, *country*, *year*, *population*, and *cases*, and the values of these variables are grouped according to the original observations. As a bonus, now the layout of these relationships is tidy.
`spread()` takes three optional arguments in addition to `data`, `key`, and `value`:
@ -367,7 +362,7 @@ You can also pass an integer or vector of integers to `sep`. `separate()` will i
separate(table3, year, into = c("century", "year"), sep = 2)
```
You can futher customize `separate()` with the `remove`, `convert`, and `extra` arguments:
You can further customize `separate()` with the `remove`, `convert`, and `extra` arguments:
- **`remove`** - Set `remove = FALSE` to retain the column of values that were separated in the final data frame.
- **`convert`** - By default, `separate()` will return new columns as character columns. Set `convert = TRUE` to convert new columns to double (numeric), integer, logical, complex, and factor columns with `type.convert()`.
@ -462,4 +457,4 @@ who <- spread(who, var, value)
who
```
The `who` data set is now tidy. It is far from sparkling (for example, it contains several redundant columns and many missing values), but it will now be much easier to work with in R.
The `who` data set is now tidy. It is far from sparkling (for example, it contains several redundant columns and many missing values), but it will now be much easier to work with in R.

BIN
toc.rds

Binary file not shown.

View File

@ -1,29 +0,0 @@
_main.Rmd:
- transform
- hierarchy
- walk
contribute.rmd: []
data-structures.Rmd: []
databases.Rmd: []
datetimes.Rmd: []
eda.Rmd: []
functions.Rmd: []
import.Rmd: []
index.rmd: toc
intro.Rmd: []
lists.Rmd:
- hierarchy
- walk
model-assess.Rmd: []
model-vis.Rmd: []
model.Rmd: []
rmarkdown.Rmd: []
shiny.Rmd: []
strings.Rmd: []
temp.Rmd: []
tidy.Rmd: []
transform.Rmd:
- transform
- join-by
- join-type
visualize.Rmd: []

View File

@ -1,20 +1,9 @@
---
layout: default
title: Transform
---
# Data transformation {#transform}
```{r setup-transform, include = FALSE}
library(dplyr)
library(nycflights13)
library(ggplot2)
source("common.R")
options(dplyr.print_min = 6, dplyr.print_max = 6)
knitr::opts_chunk$set(
fig.path = "figures/transform/",
cache = TRUE
)
```
Visualisation is an important tool for insight generation, but it is rare that you get the data in exactly the right form you need for visualisation. Often you'll need to create some new variables or summaries, or maybe you just want to rename the variables or reorder the observations in order to make the data a little easier to work with. You'll learn how to do all that (and more!) in this chapter which will teach you how to transform your data using the dplyr package.
@ -24,7 +13,7 @@ When working with data you must:
1. Figure out what you want to do.
1. Precisely describe what you want to do in such a way that the
compute can understand it (i.e. program it).
computer can understand it (i.e. program it).
1. Execute the program.
@ -67,7 +56,7 @@ It prints differently because it has a different "class" to usual data frames:
class(flights)
```
This is called a `tbl_df` (pronounced tibble diff) or a `data_frame` (pronounced "data underscore frame"; cf. `data dot frame`). Generally, however, we want worry about this relatively minor difference and will refer to everything as data frames.
This is called a `tbl_df` (pronounced "tibble diff") or a `data_frame` (pronounced "data underscore frame"; cf. `data dot frame`). Generally, however, we won't worry about this relatively minor difference and will refer to everything as data frames.
You'll learn more about how that works in data structures. If you want to convert your own data frames to this special case, use `as.data_frame()`. I recommend it for large data frames as it makes interactive exploration much less painful.
@ -83,7 +72,7 @@ There are two other important differences between tbl_dfs and data.frames:
* When you subset a tbl\_df with `[`, it always returns another tbl\_df.
Contrast this with a data frame: sometimes `[` returns a data frame and
sometimes it just returns a single column:
sometimes it just returns a single column (i.e. a vector):
```{r}
df1 <- data.frame(x = 1:3, y = 3:1)
@ -95,7 +84,7 @@ There are two other important differences between tbl_dfs and data.frames:
class(df2[, 1])
```
To extract a single column use `[[` or `$`:
To extract a single column from a tbl\_df use `[[` or `$`:
```{r}
class(df2[[1]])
@ -211,7 +200,7 @@ Multiple arguments to `filter()` are combined with "and". To get more complicate
filter(flights, month == 11 | month == 12)
```
Note the order isn't like English. This expression doesn't find on months that equal 11 or 12. Instead it finds all months that equal `11 | 12`, which is `TRUE`. In a numeric context (like here), `TRUE` becomes one, so this finds all flights in January, not November or December.
Note the order isn't like English. The following expression doesn't find on months that equal 11 or 12. Instead it finds all months that equal `11 | 12`, which is `TRUE`. In a numeric context (like here), `TRUE` becomes one, so this finds all flights in January, not November or December.
```{r, eval = FALSE}
filter(flights, month == 11 | 12)
@ -236,7 +225,7 @@ filter(flights, !(arr_delay > 120 | dep_delay > 120))
filter(flights, arr_delay <= 120, dep_delay <= 120)
```
Note that R has both `&` and `|` and `&&` and `||`. `&` and `|` are vectorised: you give them two vectors of logical values and they return a vector of logical values. `&&` and `||` are scalar operators: you give them individual `TRUE`s or `FALSE`s. They're used if `if` statements when programming. You'll learn about that later on.
Note that R has both `&` and `|` and `&&` and `||`. `&` and `|` are vectorised: you give them two vectors of logical values and they return a vector of logical values. `&&` and `||` are scalar operators: you give them individual `TRUE`s or `FALSE`s. They're used in `if` statements when programming. You'll learn about that later on.
Sometimes you want to find all rows after the first `TRUE`, or all rows until the first `FALSE`. The cumulative functions `cumany()` and `cumall()` allow you to find these values:
@ -393,7 +382,7 @@ rename(flights, tail_num = tailnum)
--------------------------------------------------------------------------------
This function works similarly to the `select` argument in `base::subset()`. Because the dplyr philosophy is to have small functions that do one thing well, it is its own function in dplyr.
The `select()` function works similarly to the `select` argument in `base::subset()`. Because the dplyr philosophy is to have small functions that do one thing well, it is its own function in dplyr.
--------------------------------------------------------------------------------
@ -535,7 +524,7 @@ ggplot(flights, aes(dep_sched %% 60)) + geom_histogram(binwidth = 1)
ggplot(flights, aes(air_time - airtime2)) + geom_histogram()
```
1. Currently `dep_time()` and `arr_time()` are convenient to look at, but
1. Currently `dep_time` and `arr_time` are convenient to look at, but
hard to compute with because they're not really continuous numbers.
Convert them to a more convenient representation of number of minutes
since midnight.
@ -566,7 +555,7 @@ by_day <- group_by(flights, year, month, day)
summarise(by_day, delay = mean(dep_delay, na.rm = TRUE))
```
Together `group_by()` and `summarise()` provide one of tools that you'll use most commonly when working with dplyr: grouped summaries. But before we go any further with this idea, we need to introduce a powerful new idea: the pipe.
Together `group_by()` and `summarise()` provide one of tools that you'll use most commonly when working with dplyr: grouped summaries. But before we go any further with this, we need to introduce a powerful new idea: the pipe.
### Combining multiple operations with the pipe
@ -609,7 +598,7 @@ delays <- flights %>%
dist = mean(distance, na.rm = TRUE),
delay = mean(arr_delay, na.rm = TRUE)
) %>%
filter(delay, count > 20, dest != "HNL")
filter(count > 20, dest != "HNL")
```
This focuses on the transformations, not what's being transformed, which makes the code easier to read. You can read it as a series of imperative statements: group, then summarise, then filter. As suggested by this reading, a good way to pronounce `%>%` when reading code is "then".
@ -774,7 +763,7 @@ Just using means, counts, and sum can get you a long way, but R provides many ot
```
* By position: `first(x)`, `nth(x, 2)`, `last(x)`. These work similarly to
`x[1]`, `x[length(x)]`, and `x[n]` but let you set a default value if that
`x[1]`, x[n], and `x[length(x)]` but let you set a default value if that
position does not exist (i.e. you're trying to get the 3rd element from a
group that only has two elements).

View File

@ -1,6 +1,3 @@
---
layout: default
title: Understand your data
---
# Understand your data
Data poses a cognitive problem; Data comprehension is a skill.

View File

@ -1,16 +1,7 @@
---
layout: default
title: Variation
---
# Variation
```{r, include = FALSE}
library(ggplot2)
knitr::opts_chunk$set(
cache = TRUE,
fig.path = "figures/variation/"
)
```
@ -60,7 +51,7 @@ One of the most useful tools in this quest are the values themselves, the values
The distribution of a variable reveals information about the probabilities associated with the variable. As you collect more data, the proportion of observations that occur at a value (or in an interval) will match the probability that the variable will take that value (or take a value in that interval) in a future measurement.
In theory, it is easy to visualize the distribution of a variable; simply display how many observations occur at each value of the variable. In practice, how you do this will depend on the type of variable that you wish to visualize.
In theory, it is easy to visualize the distribution of a variable: simply display how many observations occur at each value of the variable. In practice, how you do this will depend on the type of variable that you wish to visualize.
##### Discrete distributions
@ -122,25 +113,25 @@ The strategy of counting the number of observations at each value breaks down fo
To get around this, data scientists divide the range of a continuous variable into equally spaced intervals, a process called _binning_.
```{r, echo = FALSE}
knitr::include_graphics("images/visualization-17.png")
# knitr::include_graphics("images/visualization-17.png")
```
They then count how many observations fall into each bin.
```{r, echo = FALSE}
knitr::include_graphics("images/visualization-18.png")
# knitr::include_graphics("images/visualization-18.png")
```
And display the count as a bar, or some other object.
```{r, echo = FALSE}
knitr::include_graphics("images/visualization-19.png")
# knitr::include_graphics("images/visualization-19.png")
```
This method is temperamental because the appearance of the distribution can change dramatically if the bin size changes. As no bin size is "correct," you should explore several bin sizes when examining data.
```{r, echo = FALSE}
knitr::include_graphics("images/visualization-20.png")
# knitr::include_graphics("images/visualization-20.png")
```
Several geoms exist to help you visualize continuous distributions. They almost all use the "bin" stat to implement the above strategy. For each of these geoms, you can set the following arguments for "bin" to use:
@ -148,7 +139,7 @@ Several geoms exist to help you visualize continuous distributions. They almost
* `binwidth` - the width to use for the bins in the same units as the x variable
* `origin` - origin of the first bin interval
* `right` - if `TRUE` bins will be right closed (e.g. points that fall on the border of two bins will be counted with the bin to the left)
* `breaks` - a vector of actual bin breaks to use. If you set the breaks argument, it will overide the binwidth and origin arguments.
* `breaks` - a vector of actual bin breaks to use. If you set the breaks argument, it will override the binwidth and origin arguments.
Use `geom_histogram()` to make a traditional histogram. The height of each bar reveals how many observations fall within the width of the bar.
@ -164,7 +155,7 @@ ggplot(data = diamonds) +
geom_histogram(aes(x = carat), binwidth = 1)
```
Notice how different binwidths reveal different information. The plot above shows that the availability of diamonds decreases quickly as carat size increases. The plot below shows that there are more diamonds than you would expect at whole carat sizes (and common fractions of carat sizes). Moreover, for each popular size, there are more diamonds that are slightly larger than the size than there are that are slightly smaller than the size.
Notice how different binwidths reveal different information. The plot above shows that the availability of diamonds decreases quickly as carat size increases. The plot below shows that there are more diamonds than you would expect at whole carat sizes (and common fractions of carat sizes). Moreover, for each popular size, there are more diamonds slightly larger than the size than diamonds slightly smaller than the size.
```{r}
@ -297,7 +288,7 @@ You've probably heard that "correlation (covariation) does not prove causation."
Visualization is one of the best ways to spot covariation. How you look for covariation will depend on the structural relationship between two variables. The simplest structure occurs when two continuous variables have a functional relationship, where each value of one variable corresponds to a single value of the second variable.
In this scenario, covariation will appear as a pattern in the relationship. If two variables o not covary, their functional relationship will look like a random walk.
In this scenario, covariation will appear as a pattern in the relationship. If two variables do not covary, their functional relationship will look like a random walk.
The variables `date` and `unemploy` in the `economics` data set have a functional relationship. The `economics` data set comes with `ggplot2` and contains various economic indicators for the United States between 1967 and 2007. The `unemploy` variable measures the number of unemployed individuals in the United States in thousands.
@ -460,7 +451,7 @@ Control the appearance of the labels with the following arguments. You can also
* `hjust` - horizontal adjustment
* `vjust`- vertical adjustment
Scatterplots do not work well with large data sets because individual points will begin to occlude each other. As a result, you cannot tell where the mass of the data lies. Does a black region contain a single layer of points? Or hundreds of points stacked on top of each other.
Scatterplots do not work well with large data sets because individual points will begin to occlude each other. As a result, you cannot tell where the mass of the data lies. Does a black region contain a single layer of points? Or hundreds of points stacked on top of each other?
You can see this type of plotting in the `diamonds` data set. The data set only contains 53,940 points, but the points overplot each other in a way that we cannot fix with jittering.
@ -471,7 +462,7 @@ ggplot(data = diamonds) +
For large data, it is more useful to plot summary information that describes the raw data than it is to plot the raw data itself. Several geoms can help you do this.
The simplest way to summarize covariance between two variables is with a model line. The model line displays the trend of the relationship between the variables.
The simplest way to summarise covariance between two variables is with a model line. The model line displays the trend of the relationship between the variables.
Use `geom_smooth()` to display a model line between any two variables. As with `geom_rug()`, `geom_smooth()` works well as a second layer for a plot (See Section 3 for details).
@ -485,7 +476,7 @@ ggplot(data = diamonds) +
`geom_smooth()` will also plot a standard error band around the model line. You can remove the standard error band by setting the `se` argument of `geom_smooth()` to `FALSE`.
Use the `model` argument of `geom_smooth()` to adda specific type of model line to your data. `model` takes the name of an R modeling function. `geom_smooth()` will use the function to calculate the model line. For example, the code below uses R's `lm()` function to fit a linear model line to the data.
Use the `method` argument of `geom_smooth()` to add a specific type of model line to your data. `method` takes the name of an R modeling function. `geom_smooth()` will use the function to calculate the model line. For example, the code below uses R's `lm()` function to fit a linear model line to the data.
```{r}
ggplot(data = diamonds) +
@ -520,7 +511,7 @@ Useful arguments for `geom_smooth()` are:
* `level` - Confidence level to use for standard error ribbon
* `method` - Smoothing function to use, a model function in R
* `n` - The number of points to evaluate smoother at (defaults to 80)
* `se` - If TRUE` (the default), `geom_smooth()` will include a standard error ribbon
* `se` - If `TRUE` (the default), `geom_smooth()` will include a standard error ribbon
Be careful, `geom_smooth()` will overlay a trend line on every data set, even if the underlying data is uncorrelated. You can avoid being fooled by also inspecting the raw data or calculating the correlation between your variables, e.g. `cor(diamonds$carat, diamonds$price)`.
@ -549,7 +540,7 @@ Useful arguments for `geom_quantile()` are:
* `formula` - the formula to use in the smoothing function
* `quantiles` - Conditional quantiles of $y$ to display. Each quantile is displayed with a line.
`geom_smooth()` and `geom_quantile()` summarize the relationship between two variables as a function, but you can also summarize the relationship as a bivariate distribution.
`geom_smooth()` and `geom_quantile()` summarise the relationship between two variables as a function, but you can also summarise the relationship as a bivariate distribution.
`geom_bin2d()` divides the coordinate plane into a two dimensional grid and then displays the number of observations that fall into each bin in the grid. This technique let's you see where the mass of the data lies; bins with a light fill color contain more data than bins with a dark fill color. Bins with no fill contain no data at all.
@ -574,7 +565,7 @@ Useful arguments for `geom_bin2d()` are:
* `binwidth` - A vector like `c(0.1, 500)` that gives the binwidths to use in the horizontal and vertical directions. Overrides `bins` when set.
* `drop` - If `TRUE` (default) `geom_bin2d()` removes the fill from all bins that contain zero observations.
`geom_hex()` works similarly to `geom_bin2d()`, but it divides the coordinate plain into hexagon shaped bins. This can reduce visual artifacts that are introduced by the aligning edges of rectangular bins.
`geom_hex()` works similarly to `geom_bin2d()`, but it divides the coordinate plane into hexagon shaped bins. This can reduce visual artifacts that are introduced by the aligning edges of rectangular bins.
```{r}
ggplot(data = diamonds) +
@ -583,7 +574,7 @@ ggplot(data = diamonds) +
`geom_hex()` requires the `hexbin` package, which you can install with `install.packages("hexbin")`.
`geom_density2d()` uses density contours to display similar information. It is the two dimensional equivalent of `geom_density()`. Interpret a two dimensional density plot the same way you would interpret a contour map. Each line connects an area of equal density, which makes changes of slope easy to see.
`geom_density2d()` uses density contours to display similar information. It is the two dimensional equivalent of `geom_density()`. Interpret a two dimensional density plot the same way you would interpret a contour map. Each line connects points of equal density, which makes changes of slope easy to see.
As with other summary geoms, `geom_density2d()` makes a useful second layer.
@ -609,7 +600,7 @@ Useful arguments for `geom_density2d()` are:
##### Visualize correlations between three variables
There are two ways to add three (or more) variables to a two dimensional plot. You can map additional variables to aesthics within the plot, or you can use a geom that is designed to visualize three variables.
There are two ways to add three (or more) variables to a two dimensional plot. You can map additional variables to aesthetics within the plot, or you can use a geom that is designed to visualize three variables.
`ggplot2` provides three geoms that are designed to display three variables: `geom_raster()`, `geom_tile()` and `geom_contour()`. These geoms generalize `geom_bin2d()` and `geom_density()` to display a third variable instead of a count, or a density.

View File

@ -1,17 +1,5 @@
---
layout: default
title: Visualize
---
# Data visualisation
```{r setup-visualise, include = FALSE}
knitr::opts_chunk$set(
cache = TRUE,
fig.path = "figures/visualize/"
)
```
> "The simple graph has brought more information to the data analysts mind than any other device."---John Tukey
Visualization makes data decipherable. Consider what it is like to study a table of raw data. You can examine a couple of values at a time, but you cannot attend to many values at once. The data overloads your attention span, which makes it hard to spot patterns in the data. See this for yourself; can you spot the striking relationship between $X$ and $Y$ in the table below?
@ -708,19 +696,21 @@ ggplot(data = mpg) +
##### Exercises
1. What graph will this code make?
```{r eval = FALSE}
ggplot(data = mpg) +
geom_point(mapping = aes(x = displ, y = hwy)) +
facet_grid(drv ~ .)
```
1. What graph will this code make?
1. What graph will this code make?
```{r eval = FALSE}
ggplot(data = mpg) +
geom_point(mapping = aes(x = displ, y = hwy)) +
facet_grid(. ~ cyl)
```
```{r eval = FALSE}
ggplot(data = mpg) +
geom_point(mapping = aes(x = displ, y = hwy)) +
facet_grid(drv ~ .)
```
1. What graph will this code make?
```{r eval = FALSE}
ggplot(data = mpg) +
geom_point(mapping = aes(x = displ, y = hwy)) +
facet_grid(. ~ cyl)
```
### The layered grammar of graphics

View File

@ -1,6 +1,88 @@
---
layout: default
title: Work with your data
---
# Work with your data
With data, the relationships between values matter as much as the values themselves. Tidy data encodes those relationships.
Throughout this book we work with "tibbles" instead of the traditional data frame. Tibbles _are_ data frame but encode some patterns that make modern usage of R better. Unfortunately R is an old language, and things that made sense 10 or 20 years a go are no longer as valid. It's difficult to change base R without breaking existing code, so most innovation occurs in packages, providing new functions that you should use instead of the old ones.
```{r}
library(tibble)
```
## Creating tibbles
The majority of the functions that you'll use in this book already produce tibbles. But if you're working with functions from other packages, you might need to coerce a regular data frame a tibble. You can do that with `as_data_frame()`:
```{r}
as_data_frame(iris)
```
As well as data frames, this function also knows how to convert lists (provided the elements are equal length vectors), matrices, and tables.
You can also create a new tibble from individual vectors with `data_frame()`:
```{r}
data_frame(x = 1:5, y = 1, z = x ^ 2 + y)
```
`data_frame()` does much less than `data.frame()`: it never changes the type of the inputs (e.g. it never converts strings to factors!), it never changes the names of variables, and it never creates `row.names()`. You can read more about these features in the vignette, `vignette("tibble")`.
You can define a tibble row-by-row with `frame_data()`:
```{r}
frame_data(
~x, ~y, ~z,
"a", 2, 3.6,
"b", 1, 8.5
)
```
## Tibbles vs data frames
There are two main differences in the usage of a data frame vs a tibble: printing, and subsetting.
Tibbles have a refined print method that shows only the first 10 rows, and all the columns that fit on screen. This makes it much easier to work with large data. In addition to its name, each column reports its type, a nice feature borrowed from `str()`:
```{r}
library(nycflights13)
flights
```
You can control the default appearance with options:
* `options(tibble.print_max = n, tibble.print_min = m)`: if more than `m`
rows print `m` rows. Use `options(dplyr.print_max = Inf)` to always
show all rows.
* `options(tibble.width = Inf)` will always print all columns, regardless
of the width of the screen.
Tibbles are strict about subsetting. If you try to access a variable that does not exist, you'll get an error:
```{r, error = TRUE}
flights$yea
```
Tibbles also clearly delineate `[` and `[[`: `[` always returns another tibble, `[[` always returns a vector. No more `drop = FALSE`!
```{r}
class(iris[ , 1])
class(iris[ , 1, drop = FALSE])
class(as_data_frame(iris)[ , 1])
```
Contrast this with a data frame: sometimes `[` returns a data frame and
sometimes it just returns a single column:
```{r}
df1 <- data.frame(x = 1:3, y = 3:1)
class(df1[, 1:2])
class(df1[, 1])
```
## Interacting with legacy code
Some older functions don't work with tibbles because they expect `df[, 1]` to return a vector, not a data frame. If you encounter one of these functions, use `as.data.frame()` to turn a tibble back to a data frame:
```
class(as.data.frame(tbl_df(iris)))
```

1
www/.gitignore vendored
View File

@ -1 +0,0 @@
bootstrap-2.3.2

View File

@ -1,470 +0,0 @@
/*!
* Bootstrap v3.3.1 (http://getbootstrap.com)
* Copyright 2011-2014 Twitter, Inc.
* Licensed under MIT (https://github.com/twbs/bootstrap/blob/master/LICENSE)
*/
.btn-default,
.btn-primary,
.btn-success,
.btn-info,
.btn-warning,
.btn-danger {
text-shadow: 0 -1px 0 rgba(0, 0, 0, .2);
-webkit-box-shadow: inset 0 1px 0 rgba(255, 255, 255, .15), 0 1px 1px rgba(0, 0, 0, .075);
box-shadow: inset 0 1px 0 rgba(255, 255, 255, .15), 0 1px 1px rgba(0, 0, 0, .075);
}
.btn-default:active,
.btn-primary:active,
.btn-success:active,
.btn-info:active,
.btn-warning:active,
.btn-danger:active,
.btn-default.active,
.btn-primary.active,
.btn-success.active,
.btn-info.active,
.btn-warning.active,
.btn-danger.active {
-webkit-box-shadow: inset 0 3px 5px rgba(0, 0, 0, .125);
box-shadow: inset 0 3px 5px rgba(0, 0, 0, .125);
}
.btn-default .badge,
.btn-primary .badge,
.btn-success .badge,
.btn-info .badge,
.btn-warning .badge,
.btn-danger .badge {
text-shadow: none;
}
.btn:active,
.btn.active {
background-image: none;
}
.btn-default {
text-shadow: 0 1px 0 #fff;
background-image: -webkit-linear-gradient(top, #fff 0%, #e0e0e0 100%);
background-image: -o-linear-gradient(top, #fff 0%, #e0e0e0 100%);
background-image: -webkit-gradient(linear, left top, left bottom, from(#fff), to(#e0e0e0));
background-image: linear-gradient(to bottom, #fff 0%, #e0e0e0 100%);
filter: progid:DXImageTransform.Microsoft.gradient(startColorstr='#ffffffff', endColorstr='#ffe0e0e0', GradientType=0);
filter: progid:DXImageTransform.Microsoft.gradient(enabled = false);
background-repeat: repeat-x;
border-color: #dbdbdb;
border-color: #ccc;
}
.btn-default:hover,
.btn-default:focus {
background-color: #e0e0e0;
background-position: 0 -15px;
}
.btn-default:active,
.btn-default.active {
background-color: #e0e0e0;
border-color: #dbdbdb;
}
.btn-default:disabled,
.btn-default[disabled] {
background-color: #e0e0e0;
background-image: none;
}
.btn-primary {
background-image: -webkit-linear-gradient(top, #337ab7 0%, #265a88 100%);
background-image: -o-linear-gradient(top, #337ab7 0%, #265a88 100%);
background-image: -webkit-gradient(linear, left top, left bottom, from(#337ab7), to(#265a88));
background-image: linear-gradient(to bottom, #337ab7 0%, #265a88 100%);
filter: progid:DXImageTransform.Microsoft.gradient(startColorstr='#ff337ab7', endColorstr='#ff265a88', GradientType=0);
filter: progid:DXImageTransform.Microsoft.gradient(enabled = false);
background-repeat: repeat-x;
border-color: #245580;
}
.btn-primary:hover,
.btn-primary:focus {
background-color: #265a88;
background-position: 0 -15px;
}
.btn-primary:active,
.btn-primary.active {
background-color: #265a88;
border-color: #245580;
}
.btn-primary:disabled,
.btn-primary[disabled] {
background-color: #265a88;
background-image: none;
}
.btn-success {
background-image: -webkit-linear-gradient(top, #5cb85c 0%, #419641 100%);
background-image: -o-linear-gradient(top, #5cb85c 0%, #419641 100%);
background-image: -webkit-gradient(linear, left top, left bottom, from(#5cb85c), to(#419641));
background-image: linear-gradient(to bottom, #5cb85c 0%, #419641 100%);
filter: progid:DXImageTransform.Microsoft.gradient(startColorstr='#ff5cb85c', endColorstr='#ff419641', GradientType=0);
filter: progid:DXImageTransform.Microsoft.gradient(enabled = false);
background-repeat: repeat-x;
border-color: #3e8f3e;
}
.btn-success:hover,
.btn-success:focus {
background-color: #419641;
background-position: 0 -15px;
}
.btn-success:active,
.btn-success.active {
background-color: #419641;
border-color: #3e8f3e;
}
.btn-success:disabled,
.btn-success[disabled] {
background-color: #419641;
background-image: none;
}
.btn-info {
background-image: -webkit-linear-gradient(top, #5bc0de 0%, #2aabd2 100%);
background-image: -o-linear-gradient(top, #5bc0de 0%, #2aabd2 100%);
background-image: -webkit-gradient(linear, left top, left bottom, from(#5bc0de), to(#2aabd2));
background-image: linear-gradient(to bottom, #5bc0de 0%, #2aabd2 100%);
filter: progid:DXImageTransform.Microsoft.gradient(startColorstr='#ff5bc0de', endColorstr='#ff2aabd2', GradientType=0);
filter: progid:DXImageTransform.Microsoft.gradient(enabled = false);
background-repeat: repeat-x;
border-color: #28a4c9;
}
.btn-info:hover,
.btn-info:focus {
background-color: #2aabd2;
background-position: 0 -15px;
}
.btn-info:active,
.btn-info.active {
background-color: #2aabd2;
border-color: #28a4c9;
}
.btn-info:disabled,
.btn-info[disabled] {
background-color: #2aabd2;
background-image: none;
}
.btn-warning {
background-image: -webkit-linear-gradient(top, #f0ad4e 0%, #eb9316 100%);
background-image: -o-linear-gradient(top, #f0ad4e 0%, #eb9316 100%);
background-image: -webkit-gradient(linear, left top, left bottom, from(#f0ad4e), to(#eb9316));
background-image: linear-gradient(to bottom, #f0ad4e 0%, #eb9316 100%);
filter: progid:DXImageTransform.Microsoft.gradient(startColorstr='#fff0ad4e', endColorstr='#ffeb9316', GradientType=0);
filter: progid:DXImageTransform.Microsoft.gradient(enabled = false);
background-repeat: repeat-x;
border-color: #e38d13;
}
.btn-warning:hover,
.btn-warning:focus {
background-color: #eb9316;
background-position: 0 -15px;
}
.btn-warning:active,
.btn-warning.active {
background-color: #eb9316;
border-color: #e38d13;
}
.btn-warning:disabled,
.btn-warning[disabled] {
background-color: #eb9316;
background-image: none;
}
.btn-danger {
background-image: -webkit-linear-gradient(top, #d9534f 0%, #c12e2a 100%);
background-image: -o-linear-gradient(top, #d9534f 0%, #c12e2a 100%);
background-image: -webkit-gradient(linear, left top, left bottom, from(#d9534f), to(#c12e2a));
background-image: linear-gradient(to bottom, #d9534f 0%, #c12e2a 100%);
filter: progid:DXImageTransform.Microsoft.gradient(startColorstr='#ffd9534f', endColorstr='#ffc12e2a', GradientType=0);
filter: progid:DXImageTransform.Microsoft.gradient(enabled = false);
background-repeat: repeat-x;
border-color: #b92c28;
}
.btn-danger:hover,
.btn-danger:focus {
background-color: #c12e2a;
background-position: 0 -15px;
}
.btn-danger:active,
.btn-danger.active {
background-color: #c12e2a;
border-color: #b92c28;
}
.btn-danger:disabled,
.btn-danger[disabled] {
background-color: #c12e2a;
background-image: none;
}
.thumbnail,
.img-thumbnail {
-webkit-box-shadow: 0 1px 2px rgba(0, 0, 0, .075);
box-shadow: 0 1px 2px rgba(0, 0, 0, .075);
}
.dropdown-menu > li > a:hover,
.dropdown-menu > li > a:focus {
background-color: #e8e8e8;
background-image: -webkit-linear-gradient(top, #f5f5f5 0%, #e8e8e8 100%);
background-image: -o-linear-gradient(top, #f5f5f5 0%, #e8e8e8 100%);
background-image: -webkit-gradient(linear, left top, left bottom, from(#f5f5f5), to(#e8e8e8));
background-image: linear-gradient(to bottom, #f5f5f5 0%, #e8e8e8 100%);
filter: progid:DXImageTransform.Microsoft.gradient(startColorstr='#fff5f5f5', endColorstr='#ffe8e8e8', GradientType=0);
background-repeat: repeat-x;
}
.dropdown-menu > .active > a,
.dropdown-menu > .active > a:hover,
.dropdown-menu > .active > a:focus {
background-color: #2e6da4;
background-image: -webkit-linear-gradient(top, #337ab7 0%, #2e6da4 100%);
background-image: -o-linear-gradient(top, #337ab7 0%, #2e6da4 100%);
background-image: -webkit-gradient(linear, left top, left bottom, from(#337ab7), to(#2e6da4));
background-image: linear-gradient(to bottom, #337ab7 0%, #2e6da4 100%);
filter: progid:DXImageTransform.Microsoft.gradient(startColorstr='#ff337ab7', endColorstr='#ff2e6da4', GradientType=0);
background-repeat: repeat-x;
}
.navbar-default {
background-image: -webkit-linear-gradient(top, #fff 0%, #f8f8f8 100%);
background-image: -o-linear-gradient(top, #fff 0%, #f8f8f8 100%);
background-image: -webkit-gradient(linear, left top, left bottom, from(#fff), to(#f8f8f8));
background-image: linear-gradient(to bottom, #fff 0%, #f8f8f8 100%);
filter: progid:DXImageTransform.Microsoft.gradient(startColorstr='#ffffffff', endColorstr='#fff8f8f8', GradientType=0);
filter: progid:DXImageTransform.Microsoft.gradient(enabled = false);
background-repeat: repeat-x;
border-radius: 4px;
-webkit-box-shadow: inset 0 1px 0 rgba(255, 255, 255, .15), 0 1px 5px rgba(0, 0, 0, .075);
box-shadow: inset 0 1px 0 rgba(255, 255, 255, .15), 0 1px 5px rgba(0, 0, 0, .075);
}
.navbar-default .navbar-nav > .open > a,
.navbar-default .navbar-nav > .active > a {
background-image: -webkit-linear-gradient(top, #dbdbdb 0%, #e2e2e2 100%);
background-image: -o-linear-gradient(top, #dbdbdb 0%, #e2e2e2 100%);
background-image: -webkit-gradient(linear, left top, left bottom, from(#dbdbdb), to(#e2e2e2));
background-image: linear-gradient(to bottom, #dbdbdb 0%, #e2e2e2 100%);
filter: progid:DXImageTransform.Microsoft.gradient(startColorstr='#ffdbdbdb', endColorstr='#ffe2e2e2', GradientType=0);
background-repeat: repeat-x;
-webkit-box-shadow: inset 0 3px 9px rgba(0, 0, 0, .075);
box-shadow: inset 0 3px 9px rgba(0, 0, 0, .075);
}
.navbar-brand,
.navbar-nav > li > a {
text-shadow: 0 1px 0 rgba(255, 255, 255, .25);
}
.navbar-inverse {
background-image: -webkit-linear-gradient(top, #3c3c3c 0%, #222 100%);
background-image: -o-linear-gradient(top, #3c3c3c 0%, #222 100%);
background-image: -webkit-gradient(linear, left top, left bottom, from(#3c3c3c), to(#222));
background-image: linear-gradient(to bottom, #3c3c3c 0%, #222 100%);
filter: progid:DXImageTransform.Microsoft.gradient(startColorstr='#ff3c3c3c', endColorstr='#ff222222', GradientType=0);
filter: progid:DXImageTransform.Microsoft.gradient(enabled = false);
background-repeat: repeat-x;
}
.navbar-inverse .navbar-nav > .open > a,
.navbar-inverse .navbar-nav > .active > a {
background-image: -webkit-linear-gradient(top, #080808 0%, #0f0f0f 100%);
background-image: -o-linear-gradient(top, #080808 0%, #0f0f0f 100%);
background-image: -webkit-gradient(linear, left top, left bottom, from(#080808), to(#0f0f0f));
background-image: linear-gradient(to bottom, #080808 0%, #0f0f0f 100%);
filter: progid:DXImageTransform.Microsoft.gradient(startColorstr='#ff080808', endColorstr='#ff0f0f0f', GradientType=0);
background-repeat: repeat-x;
-webkit-box-shadow: inset 0 3px 9px rgba(0, 0, 0, .25);
box-shadow: inset 0 3px 9px rgba(0, 0, 0, .25);
}
.navbar-inverse .navbar-brand,
.navbar-inverse .navbar-nav > li > a {
text-shadow: 0 -1px 0 rgba(0, 0, 0, .25);
}
.navbar-static-top,
.navbar-fixed-top,
.navbar-fixed-bottom {
border-radius: 0;
}
@media (max-width: 767px) {
.navbar .navbar-nav .open .dropdown-menu > .active > a,
.navbar .navbar-nav .open .dropdown-menu > .active > a:hover,
.navbar .navbar-nav .open .dropdown-menu > .active > a:focus {
color: #fff;
background-image: -webkit-linear-gradient(top, #337ab7 0%, #2e6da4 100%);
background-image: -o-linear-gradient(top, #337ab7 0%, #2e6da4 100%);
background-image: -webkit-gradient(linear, left top, left bottom, from(#337ab7), to(#2e6da4));
background-image: linear-gradient(to bottom, #337ab7 0%, #2e6da4 100%);
filter: progid:DXImageTransform.Microsoft.gradient(startColorstr='#ff337ab7', endColorstr='#ff2e6da4', GradientType=0);
background-repeat: repeat-x;
}
}
.alert {
text-shadow: 0 1px 0 rgba(255, 255, 255, .2);
-webkit-box-shadow: inset 0 1px 0 rgba(255, 255, 255, .25), 0 1px 2px rgba(0, 0, 0, .05);
box-shadow: inset 0 1px 0 rgba(255, 255, 255, .25), 0 1px 2px rgba(0, 0, 0, .05);
}
.alert-success {
background-image: -webkit-linear-gradient(top, #dff0d8 0%, #c8e5bc 100%);
background-image: -o-linear-gradient(top, #dff0d8 0%, #c8e5bc 100%);
background-image: -webkit-gradient(linear, left top, left bottom, from(#dff0d8), to(#c8e5bc));
background-image: linear-gradient(to bottom, #dff0d8 0%, #c8e5bc 100%);
filter: progid:DXImageTransform.Microsoft.gradient(startColorstr='#ffdff0d8', endColorstr='#ffc8e5bc', GradientType=0);
background-repeat: repeat-x;
border-color: #b2dba1;
}
.alert-info {
background-image: -webkit-linear-gradient(top, #d9edf7 0%, #b9def0 100%);
background-image: -o-linear-gradient(top, #d9edf7 0%, #b9def0 100%);
background-image: -webkit-gradient(linear, left top, left bottom, from(#d9edf7), to(#b9def0));
background-image: linear-gradient(to bottom, #d9edf7 0%, #b9def0 100%);
filter: progid:DXImageTransform.Microsoft.gradient(startColorstr='#ffd9edf7', endColorstr='#ffb9def0', GradientType=0);
background-repeat: repeat-x;
border-color: #9acfea;
}
.alert-warning {
background-image: -webkit-linear-gradient(top, #fcf8e3 0%, #f8efc0 100%);
background-image: -o-linear-gradient(top, #fcf8e3 0%, #f8efc0 100%);
background-image: -webkit-gradient(linear, left top, left bottom, from(#fcf8e3), to(#f8efc0));
background-image: linear-gradient(to bottom, #fcf8e3 0%, #f8efc0 100%);
filter: progid:DXImageTransform.Microsoft.gradient(startColorstr='#fffcf8e3', endColorstr='#fff8efc0', GradientType=0);
background-repeat: repeat-x;
border-color: #f5e79e;
}
.alert-danger {
background-image: -webkit-linear-gradient(top, #f2dede 0%, #e7c3c3 100%);
background-image: -o-linear-gradient(top, #f2dede 0%, #e7c3c3 100%);
background-image: -webkit-gradient(linear, left top, left bottom, from(#f2dede), to(#e7c3c3));
background-image: linear-gradient(to bottom, #f2dede 0%, #e7c3c3 100%);
filter: progid:DXImageTransform.Microsoft.gradient(startColorstr='#fff2dede', endColorstr='#ffe7c3c3', GradientType=0);
background-repeat: repeat-x;
border-color: #dca7a7;
}
.progress {
background-image: -webkit-linear-gradient(top, #ebebeb 0%, #f5f5f5 100%);
background-image: -o-linear-gradient(top, #ebebeb 0%, #f5f5f5 100%);
background-image: -webkit-gradient(linear, left top, left bottom, from(#ebebeb), to(#f5f5f5));
background-image: linear-gradient(to bottom, #ebebeb 0%, #f5f5f5 100%);
filter: progid:DXImageTransform.Microsoft.gradient(startColorstr='#ffebebeb', endColorstr='#fff5f5f5', GradientType=0);
background-repeat: repeat-x;
}
.progress-bar {
background-image: -webkit-linear-gradient(top, #337ab7 0%, #286090 100%);
background-image: -o-linear-gradient(top, #337ab7 0%, #286090 100%);
background-image: -webkit-gradient(linear, left top, left bottom, from(#337ab7), to(#286090));
background-image: linear-gradient(to bottom, #337ab7 0%, #286090 100%);
filter: progid:DXImageTransform.Microsoft.gradient(startColorstr='#ff337ab7', endColorstr='#ff286090', GradientType=0);
background-repeat: repeat-x;
}
.progress-bar-success {
background-image: -webkit-linear-gradient(top, #5cb85c 0%, #449d44 100%);
background-image: -o-linear-gradient(top, #5cb85c 0%, #449d44 100%);
background-image: -webkit-gradient(linear, left top, left bottom, from(#5cb85c), to(#449d44));
background-image: linear-gradient(to bottom, #5cb85c 0%, #449d44 100%);
filter: progid:DXImageTransform.Microsoft.gradient(startColorstr='#ff5cb85c', endColorstr='#ff449d44', GradientType=0);
background-repeat: repeat-x;
}
.progress-bar-info {
background-image: -webkit-linear-gradient(top, #5bc0de 0%, #31b0d5 100%);
background-image: -o-linear-gradient(top, #5bc0de 0%, #31b0d5 100%);
background-image: -webkit-gradient(linear, left top, left bottom, from(#5bc0de), to(#31b0d5));
background-image: linear-gradient(to bottom, #5bc0de 0%, #31b0d5 100%);
filter: progid:DXImageTransform.Microsoft.gradient(startColorstr='#ff5bc0de', endColorstr='#ff31b0d5', GradientType=0);
background-repeat: repeat-x;
}
.progress-bar-warning {
background-image: -webkit-linear-gradient(top, #f0ad4e 0%, #ec971f 100%);
background-image: -o-linear-gradient(top, #f0ad4e 0%, #ec971f 100%);
background-image: -webkit-gradient(linear, left top, left bottom, from(#f0ad4e), to(#ec971f));
background-image: linear-gradient(to bottom, #f0ad4e 0%, #ec971f 100%);
filter: progid:DXImageTransform.Microsoft.gradient(startColorstr='#fff0ad4e', endColorstr='#ffec971f', GradientType=0);
background-repeat: repeat-x;
}
.progress-bar-danger {
background-image: -webkit-linear-gradient(top, #d9534f 0%, #c9302c 100%);
background-image: -o-linear-gradient(top, #d9534f 0%, #c9302c 100%);
background-image: -webkit-gradient(linear, left top, left bottom, from(#d9534f), to(#c9302c));
background-image: linear-gradient(to bottom, #d9534f 0%, #c9302c 100%);
filter: progid:DXImageTransform.Microsoft.gradient(startColorstr='#ffd9534f', endColorstr='#ffc9302c', GradientType=0);
background-repeat: repeat-x;
}
.progress-bar-striped {
background-image: -webkit-linear-gradient(45deg, rgba(255, 255, 255, .15) 25%, transparent 25%, transparent 50%, rgba(255, 255, 255, .15) 50%, rgba(255, 255, 255, .15) 75%, transparent 75%, transparent);
background-image: -o-linear-gradient(45deg, rgba(255, 255, 255, .15) 25%, transparent 25%, transparent 50%, rgba(255, 255, 255, .15) 50%, rgba(255, 255, 255, .15) 75%, transparent 75%, transparent);
background-image: linear-gradient(45deg, rgba(255, 255, 255, .15) 25%, transparent 25%, transparent 50%, rgba(255, 255, 255, .15) 50%, rgba(255, 255, 255, .15) 75%, transparent 75%, transparent);
}
.list-group {
border-radius: 4px;
-webkit-box-shadow: 0 1px 2px rgba(0, 0, 0, .075);
box-shadow: 0 1px 2px rgba(0, 0, 0, .075);
}
.list-group-item.active,
.list-group-item.active:hover,
.list-group-item.active:focus {
text-shadow: 0 -1px 0 #286090;
background-image: -webkit-linear-gradient(top, #337ab7 0%, #2b669a 100%);
background-image: -o-linear-gradient(top, #337ab7 0%, #2b669a 100%);
background-image: -webkit-gradient(linear, left top, left bottom, from(#337ab7), to(#2b669a));
background-image: linear-gradient(to bottom, #337ab7 0%, #2b669a 100%);
filter: progid:DXImageTransform.Microsoft.gradient(startColorstr='#ff337ab7', endColorstr='#ff2b669a', GradientType=0);
background-repeat: repeat-x;
border-color: #2b669a;
}
.list-group-item.active .badge,
.list-group-item.active:hover .badge,
.list-group-item.active:focus .badge {
text-shadow: none;
}
.panel {
-webkit-box-shadow: 0 1px 2px rgba(0, 0, 0, .05);
box-shadow: 0 1px 2px rgba(0, 0, 0, .05);
}
.panel-default > .panel-heading {
background-image: -webkit-linear-gradient(top, #f5f5f5 0%, #e8e8e8 100%);
background-image: -o-linear-gradient(top, #f5f5f5 0%, #e8e8e8 100%);
background-image: -webkit-gradient(linear, left top, left bottom, from(#f5f5f5), to(#e8e8e8));
background-image: linear-gradient(to bottom, #f5f5f5 0%, #e8e8e8 100%);
filter: progid:DXImageTransform.Microsoft.gradient(startColorstr='#fff5f5f5', endColorstr='#ffe8e8e8', GradientType=0);
background-repeat: repeat-x;
}
.panel-primary > .panel-heading {
background-image: -webkit-linear-gradient(top, #337ab7 0%, #2e6da4 100%);
background-image: -o-linear-gradient(top, #337ab7 0%, #2e6da4 100%);
background-image: -webkit-gradient(linear, left top, left bottom, from(#337ab7), to(#2e6da4));
background-image: linear-gradient(to bottom, #337ab7 0%, #2e6da4 100%);
filter: progid:DXImageTransform.Microsoft.gradient(startColorstr='#ff337ab7', endColorstr='#ff2e6da4', GradientType=0);
background-repeat: repeat-x;
}
.panel-success > .panel-heading {
background-image: -webkit-linear-gradient(top, #dff0d8 0%, #d0e9c6 100%);
background-image: -o-linear-gradient(top, #dff0d8 0%, #d0e9c6 100%);
background-image: -webkit-gradient(linear, left top, left bottom, from(#dff0d8), to(#d0e9c6));
background-image: linear-gradient(to bottom, #dff0d8 0%, #d0e9c6 100%);
filter: progid:DXImageTransform.Microsoft.gradient(startColorstr='#ffdff0d8', endColorstr='#ffd0e9c6', GradientType=0);
background-repeat: repeat-x;
}
.panel-info > .panel-heading {
background-image: -webkit-linear-gradient(top, #d9edf7 0%, #c4e3f3 100%);
background-image: -o-linear-gradient(top, #d9edf7 0%, #c4e3f3 100%);
background-image: -webkit-gradient(linear, left top, left bottom, from(#d9edf7), to(#c4e3f3));
background-image: linear-gradient(to bottom, #d9edf7 0%, #c4e3f3 100%);
filter: progid:DXImageTransform.Microsoft.gradient(startColorstr='#ffd9edf7', endColorstr='#ffc4e3f3', GradientType=0);
background-repeat: repeat-x;
}
.panel-warning > .panel-heading {
background-image: -webkit-linear-gradient(top, #fcf8e3 0%, #faf2cc 100%);
background-image: -o-linear-gradient(top, #fcf8e3 0%, #faf2cc 100%);
background-image: -webkit-gradient(linear, left top, left bottom, from(#fcf8e3), to(#faf2cc));
background-image: linear-gradient(to bottom, #fcf8e3 0%, #faf2cc 100%);
filter: progid:DXImageTransform.Microsoft.gradient(startColorstr='#fffcf8e3', endColorstr='#fffaf2cc', GradientType=0);
background-repeat: repeat-x;
}
.panel-danger > .panel-heading {
background-image: -webkit-linear-gradient(top, #f2dede 0%, #ebcccc 100%);
background-image: -o-linear-gradient(top, #f2dede 0%, #ebcccc 100%);
background-image: -webkit-gradient(linear, left top, left bottom, from(#f2dede), to(#ebcccc));
background-image: linear-gradient(to bottom, #f2dede 0%, #ebcccc 100%);
filter: progid:DXImageTransform.Microsoft.gradient(startColorstr='#fff2dede', endColorstr='#ffebcccc', GradientType=0);
background-repeat: repeat-x;
}
.well {
background-image: -webkit-linear-gradient(top, #e8e8e8 0%, #f5f5f5 100%);
background-image: -o-linear-gradient(top, #e8e8e8 0%, #f5f5f5 100%);
background-image: -webkit-gradient(linear, left top, left bottom, from(#e8e8e8), to(#f5f5f5));
background-image: linear-gradient(to bottom, #e8e8e8 0%, #f5f5f5 100%);
filter: progid:DXImageTransform.Microsoft.gradient(startColorstr='#ffe8e8e8', endColorstr='#fff5f5f5', GradientType=0);
background-repeat: repeat-x;
border-color: #dcdcdc;
-webkit-box-shadow: inset 0 1px 3px rgba(0, 0, 0, .05), 0 1px 0 rgba(255, 255, 255, .1);
box-shadow: inset 0 1px 3px rgba(0, 0, 0, .05), 0 1px 0 rgba(255, 255, 255, .1);
}
/*# sourceMappingURL=bootstrap-theme.css.map */

File diff suppressed because one or more lines are too long

File diff suppressed because it is too large Load Diff

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View File

@ -1,229 +0,0 @@
<?xml version="1.0" standalone="no"?>
<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd" >
<svg xmlns="http://www.w3.org/2000/svg">
<metadata></metadata>
<defs>
<font id="glyphicons_halflingsregular" horiz-adv-x="1200" >
<font-face units-per-em="1200" ascent="960" descent="-240" />
<missing-glyph horiz-adv-x="500" />
<glyph />
<glyph />
<glyph unicode="&#xd;" />
<glyph unicode=" " />
<glyph unicode="*" d="M100 500v200h259l-183 183l141 141l183 -183v259h200v-259l183 183l141 -141l-183 -183h259v-200h-259l183 -183l-141 -141l-183 183v-259h-200v259l-183 -183l-141 141l183 183h-259z" />
<glyph unicode="+" d="M0 400v300h400v400h300v-400h400v-300h-400v-400h-300v400h-400z" />
<glyph unicode="&#xa0;" />
<glyph unicode="&#x2000;" horiz-adv-x="652" />
<glyph unicode="&#x2001;" horiz-adv-x="1304" />
<glyph unicode="&#x2002;" horiz-adv-x="652" />
<glyph unicode="&#x2003;" horiz-adv-x="1304" />
<glyph unicode="&#x2004;" horiz-adv-x="434" />
<glyph unicode="&#x2005;" horiz-adv-x="326" />
<glyph unicode="&#x2006;" horiz-adv-x="217" />
<glyph unicode="&#x2007;" horiz-adv-x="217" />
<glyph unicode="&#x2008;" horiz-adv-x="163" />
<glyph unicode="&#x2009;" horiz-adv-x="260" />
<glyph unicode="&#x200a;" horiz-adv-x="72" />
<glyph unicode="&#x202f;" horiz-adv-x="260" />
<glyph unicode="&#x205f;" horiz-adv-x="326" />
<glyph unicode="&#x20ac;" d="M100 500l100 100h113q0 47 5 100h-218l100 100h135q37 167 112 257q117 141 297 141q242 0 354 -189q60 -103 66 -209h-181q0 55 -25.5 99t-63.5 68t-75 36.5t-67 12.5q-24 0 -52.5 -10t-62.5 -32t-65.5 -67t-50.5 -107h379l-100 -100h-300q-6 -46 -6 -100h406l-100 -100 h-300q9 -74 33 -132t52.5 -91t62 -54.5t59 -29t46.5 -7.5q29 0 66 13t75 37t63.5 67.5t25.5 96.5h174q-31 -172 -128 -278q-107 -117 -274 -117q-205 0 -324 158q-36 46 -69 131.5t-45 205.5h-217z" />
<glyph unicode="&#x2212;" d="M200 400h900v300h-900v-300z" />
<glyph unicode="&#x25fc;" horiz-adv-x="500" d="M0 0z" />
<glyph unicode="&#x2601;" d="M-14 494q0 -80 56.5 -137t135.5 -57h750q120 0 205 86.5t85 207.5t-85 207t-205 86q-46 0 -90 -14q-44 97 -134.5 156.5t-200.5 59.5q-152 0 -260 -107.5t-108 -260.5q0 -25 2 -37q-66 -14 -108.5 -67.5t-42.5 -122.5z" />
<glyph unicode="&#x2709;" d="M0 100l400 400l200 -200l200 200l400 -400h-1200zM0 300v600l300 -300zM0 1100l600 -603l600 603h-1200zM900 600l300 300v-600z" />
<glyph unicode="&#x270f;" d="M-13 -13l333 112l-223 223zM187 403l214 -214l614 614l-214 214zM887 1103l214 -214l99 92q13 13 13 32.5t-13 33.5l-153 153q-15 13 -33 13t-33 -13z" />
<glyph unicode="&#xe001;" d="M0 1200h1200l-500 -550v-550h300v-100h-800v100h300v550z" />
<glyph unicode="&#xe002;" d="M14 84q18 -55 86 -75.5t147 5.5q65 21 109 69t44 90v606l600 155v-521q-64 16 -138 -7q-79 -26 -122.5 -83t-25.5 -111q18 -55 86 -75.5t147 4.5q70 23 111.5 63.5t41.5 95.5v881q0 10 -7 15.5t-17 2.5l-752 -193q-10 -3 -17 -12.5t-7 -19.5v-689q-64 17 -138 -7 q-79 -25 -122.5 -82t-25.5 -112z" />
<glyph unicode="&#xe003;" d="M23 693q0 200 142 342t342 142t342 -142t142 -342q0 -142 -78 -261l300 -300q7 -8 7 -18t-7 -18l-109 -109q-8 -7 -18 -7t-18 7l-300 300q-119 -78 -261 -78q-200 0 -342 142t-142 342zM176 693q0 -136 97 -233t234 -97t233.5 96.5t96.5 233.5t-96.5 233.5t-233.5 96.5 t-234 -97t-97 -233z" />
<glyph unicode="&#xe005;" d="M100 784q0 64 28 123t73 100.5t104.5 64t119 20.5t120 -38.5t104.5 -104.5q48 69 109.5 105t121.5 38t118.5 -20.5t102.5 -64t71 -100.5t27 -123q0 -57 -33.5 -117.5t-94 -124.5t-126.5 -127.5t-150 -152.5t-146 -174q-62 85 -145.5 174t-149.5 152.5t-126.5 127.5 t-94 124.5t-33.5 117.5z" />
<glyph unicode="&#xe006;" d="M-72 800h479l146 400h2l146 -400h472l-382 -278l145 -449l-384 275l-382 -275l146 447zM168 71l2 1z" />
<glyph unicode="&#xe007;" d="M-72 800h479l146 400h2l146 -400h472l-382 -278l145 -449l-384 275l-382 -275l146 447zM168 71l2 1zM237 700l196 -142l-73 -226l192 140l195 -141l-74 229l193 140h-235l-77 211l-78 -211h-239z" />
<glyph unicode="&#xe008;" d="M0 0v143l400 257v100q-37 0 -68.5 74.5t-31.5 125.5v200q0 124 88 212t212 88t212 -88t88 -212v-200q0 -51 -31.5 -125.5t-68.5 -74.5v-100l400 -257v-143h-1200z" />
<glyph unicode="&#xe009;" d="M0 0v1100h1200v-1100h-1200zM100 100h100v100h-100v-100zM100 300h100v100h-100v-100zM100 500h100v100h-100v-100zM100 700h100v100h-100v-100zM100 900h100v100h-100v-100zM300 100h600v400h-600v-400zM300 600h600v400h-600v-400zM1000 100h100v100h-100v-100z M1000 300h100v100h-100v-100zM1000 500h100v100h-100v-100zM1000 700h100v100h-100v-100zM1000 900h100v100h-100v-100z" />
<glyph unicode="&#xe010;" d="M0 50v400q0 21 14.5 35.5t35.5 14.5h400q21 0 35.5 -14.5t14.5 -35.5v-400q0 -21 -14.5 -35.5t-35.5 -14.5h-400q-21 0 -35.5 14.5t-14.5 35.5zM0 650v400q0 21 14.5 35.5t35.5 14.5h400q21 0 35.5 -14.5t14.5 -35.5v-400q0 -21 -14.5 -35.5t-35.5 -14.5h-400 q-21 0 -35.5 14.5t-14.5 35.5zM600 50v400q0 21 14.5 35.5t35.5 14.5h400q21 0 35.5 -14.5t14.5 -35.5v-400q0 -21 -14.5 -35.5t-35.5 -14.5h-400q-21 0 -35.5 14.5t-14.5 35.5zM600 650v400q0 21 14.5 35.5t35.5 14.5h400q21 0 35.5 -14.5t14.5 -35.5v-400 q0 -21 -14.5 -35.5t-35.5 -14.5h-400q-21 0 -35.5 14.5t-14.5 35.5z" />
<glyph unicode="&#xe011;" d="M0 50v200q0 21 14.5 35.5t35.5 14.5h200q21 0 35.5 -14.5t14.5 -35.5v-200q0 -21 -14.5 -35.5t-35.5 -14.5h-200q-21 0 -35.5 14.5t-14.5 35.5zM0 450v200q0 21 14.5 35.5t35.5 14.5h200q21 0 35.5 -14.5t14.5 -35.5v-200q0 -21 -14.5 -35.5t-35.5 -14.5h-200 q-21 0 -35.5 14.5t-14.5 35.5zM0 850v200q0 21 14.5 35.5t35.5 14.5h200q21 0 35.5 -14.5t14.5 -35.5v-200q0 -21 -14.5 -35.5t-35.5 -14.5h-200q-21 0 -35.5 14.5t-14.5 35.5zM400 50v200q0 21 14.5 35.5t35.5 14.5h200q21 0 35.5 -14.5t14.5 -35.5v-200q0 -21 -14.5 -35.5 t-35.5 -14.5h-200q-21 0 -35.5 14.5t-14.5 35.5zM400 450v200q0 21 14.5 35.5t35.5 14.5h200q21 0 35.5 -14.5t14.5 -35.5v-200q0 -21 -14.5 -35.5t-35.5 -14.5h-200q-21 0 -35.5 14.5t-14.5 35.5zM400 850v200q0 21 14.5 35.5t35.5 14.5h200q21 0 35.5 -14.5t14.5 -35.5 v-200q0 -21 -14.5 -35.5t-35.5 -14.5h-200q-21 0 -35.5 14.5t-14.5 35.5zM800 50v200q0 21 14.5 35.5t35.5 14.5h200q21 0 35.5 -14.5t14.5 -35.5v-200q0 -21 -14.5 -35.5t-35.5 -14.5h-200q-21 0 -35.5 14.5t-14.5 35.5zM800 450v200q0 21 14.5 35.5t35.5 14.5h200 q21 0 35.5 -14.5t14.5 -35.5v-200q0 -21 -14.5 -35.5t-35.5 -14.5h-200q-21 0 -35.5 14.5t-14.5 35.5zM800 850v200q0 21 14.5 35.5t35.5 14.5h200q21 0 35.5 -14.5t14.5 -35.5v-200q0 -21 -14.5 -35.5t-35.5 -14.5h-200q-21 0 -35.5 14.5t-14.5 35.5z" />
<glyph unicode="&#xe012;" d="M0 50v200q0 21 14.5 35.5t35.5 14.5h200q21 0 35.5 -14.5t14.5 -35.5v-200q0 -21 -14.5 -35.5t-35.5 -14.5h-200q-21 0 -35.5 14.5t-14.5 35.5zM0 450q0 -21 14.5 -35.5t35.5 -14.5h200q21 0 35.5 14.5t14.5 35.5v200q0 21 -14.5 35.5t-35.5 14.5h-200q-21 0 -35.5 -14.5 t-14.5 -35.5v-200zM0 850v200q0 21 14.5 35.5t35.5 14.5h200q21 0 35.5 -14.5t14.5 -35.5v-200q0 -21 -14.5 -35.5t-35.5 -14.5h-200q-21 0 -35.5 14.5t-14.5 35.5zM400 50v200q0 21 14.5 35.5t35.5 14.5h700q21 0 35.5 -14.5t14.5 -35.5v-200q0 -21 -14.5 -35.5 t-35.5 -14.5h-700q-21 0 -35.5 14.5t-14.5 35.5zM400 450v200q0 21 14.5 35.5t35.5 14.5h700q21 0 35.5 -14.5t14.5 -35.5v-200q0 -21 -14.5 -35.5t-35.5 -14.5h-700q-21 0 -35.5 14.5t-14.5 35.5zM400 850v200q0 21 14.5 35.5t35.5 14.5h700q21 0 35.5 -14.5t14.5 -35.5 v-200q0 -21 -14.5 -35.5t-35.5 -14.5h-700q-21 0 -35.5 14.5t-14.5 35.5z" />
<glyph unicode="&#xe013;" d="M29 454l419 -420l818 820l-212 212l-607 -607l-206 207z" />
<glyph unicode="&#xe014;" d="M106 318l282 282l-282 282l212 212l282 -282l282 282l212 -212l-282 -282l282 -282l-212 -212l-282 282l-282 -282z" />
<glyph unicode="&#xe015;" d="M23 693q0 200 142 342t342 142t342 -142t142 -342q0 -142 -78 -261l300 -300q7 -8 7 -18t-7 -18l-109 -109q-8 -7 -18 -7t-18 7l-300 300q-119 -78 -261 -78q-200 0 -342 142t-142 342zM176 693q0 -136 97 -233t234 -97t233.5 96.5t96.5 233.5t-96.5 233.5t-233.5 96.5 t-234 -97t-97 -233zM300 600v200h100v100h200v-100h100v-200h-100v-100h-200v100h-100z" />
<glyph unicode="&#xe016;" d="M23 694q0 200 142 342t342 142t342 -142t142 -342q0 -141 -78 -262l300 -299q7 -7 7 -18t-7 -18l-109 -109q-8 -8 -18 -8t-18 8l-300 300q-119 -78 -261 -78q-200 0 -342 142t-142 342zM176 694q0 -136 97 -233t234 -97t233.5 97t96.5 233t-96.5 233t-233.5 97t-234 -97 t-97 -233zM300 601h400v200h-400v-200z" />
<glyph unicode="&#xe017;" d="M23 600q0 183 105 331t272 210v-166q-103 -55 -165 -155t-62 -220q0 -177 125 -302t302 -125t302 125t125 302q0 120 -62 220t-165 155v166q167 -62 272 -210t105 -331q0 -118 -45.5 -224.5t-123 -184t-184 -123t-224.5 -45.5t-224.5 45.5t-184 123t-123 184t-45.5 224.5 zM500 750q0 -21 14.5 -35.5t35.5 -14.5h100q21 0 35.5 14.5t14.5 35.5v400q0 21 -14.5 35.5t-35.5 14.5h-100q-21 0 -35.5 -14.5t-14.5 -35.5v-400z" />
<glyph unicode="&#xe018;" d="M100 1h200v300h-200v-300zM400 1v500h200v-500h-200zM700 1v800h200v-800h-200zM1000 1v1200h200v-1200h-200z" />
<glyph unicode="&#xe019;" d="M26 601q0 -33 6 -74l151 -38l2 -6q14 -49 38 -93l3 -5l-80 -134q45 -59 105 -105l133 81l5 -3q45 -26 94 -39l5 -2l38 -151q40 -5 74 -5q27 0 74 5l38 151l6 2q46 13 93 39l5 3l134 -81q56 44 104 105l-80 134l3 5q24 44 39 93l1 6l152 38q5 40 5 74q0 28 -5 73l-152 38 l-1 6q-16 51 -39 93l-3 5l80 134q-44 58 -104 105l-134 -81l-5 3q-45 25 -93 39l-6 1l-38 152q-40 5 -74 5q-27 0 -74 -5l-38 -152l-5 -1q-50 -14 -94 -39l-5 -3l-133 81q-59 -47 -105 -105l80 -134l-3 -5q-25 -47 -38 -93l-2 -6l-151 -38q-6 -48 -6 -73zM385 601 q0 88 63 151t152 63t152 -63t63 -151q0 -89 -63 -152t-152 -63t-152 63t-63 152z" />
<glyph unicode="&#xe020;" d="M100 1025v50q0 10 7.5 17.5t17.5 7.5h275v100q0 41 29.5 70.5t70.5 29.5h300q41 0 70.5 -29.5t29.5 -70.5v-100h275q10 0 17.5 -7.5t7.5 -17.5v-50q0 -11 -7 -18t-18 -7h-1050q-11 0 -18 7t-7 18zM200 100v800h900v-800q0 -41 -29.5 -71t-70.5 -30h-700q-41 0 -70.5 30 t-29.5 71zM300 100h100v700h-100v-700zM500 100h100v700h-100v-700zM500 1100h300v100h-300v-100zM700 100h100v700h-100v-700zM900 100h100v700h-100v-700z" />
<glyph unicode="&#xe021;" d="M1 601l656 644l644 -644h-200v-600h-300v400h-300v-400h-300v600h-200z" />
<glyph unicode="&#xe022;" d="M100 25v1150q0 11 7 18t18 7h475v-500h400v-675q0 -11 -7 -18t-18 -7h-850q-11 0 -18 7t-7 18zM700 800v300l300 -300h-300z" />
<glyph unicode="&#xe023;" d="M4 600q0 162 80 299t217 217t299 80t299 -80t217 -217t80 -299t-80 -299t-217 -217t-299 -80t-299 80t-217 217t-80 299zM186 600q0 -171 121.5 -292.5t292.5 -121.5t292.5 121.5t121.5 292.5t-121.5 292.5t-292.5 121.5t-292.5 -121.5t-121.5 -292.5zM500 500v400h100 v-300h200v-100h-300z" />
<glyph unicode="&#xe024;" d="M-100 0l431 1200h209l-21 -300h162l-20 300h208l431 -1200h-538l-41 400h-242l-40 -400h-539zM488 500h224l-27 300h-170z" />
<glyph unicode="&#xe025;" d="M0 0v400h490l-290 300h200v500h300v-500h200l-290 -300h490v-400h-1100zM813 200h175v100h-175v-100z" />
<glyph unicode="&#xe026;" d="M1 600q0 122 47.5 233t127.5 191t191 127.5t233 47.5t233 -47.5t191 -127.5t127.5 -191t47.5 -233t-47.5 -233t-127.5 -191t-191 -127.5t-233 -47.5t-233 47.5t-191 127.5t-127.5 191t-47.5 233zM188 600q0 -170 121 -291t291 -121t291 121t121 291t-121 291t-291 121 t-291 -121t-121 -291zM350 600h150v300h200v-300h150l-250 -300z" />
<glyph unicode="&#xe027;" d="M4 600q0 162 80 299t217 217t299 80t299 -80t217 -217t80 -299t-80 -299t-217 -217t-299 -80t-299 80t-217 217t-80 299zM186 600q0 -171 121.5 -292.5t292.5 -121.5t292.5 121.5t121.5 292.5t-121.5 292.5t-292.5 121.5t-292.5 -121.5t-121.5 -292.5zM350 600l250 300 l250 -300h-150v-300h-200v300h-150z" />
<glyph unicode="&#xe028;" d="M0 25v475l200 700h800l199 -700l1 -475q0 -11 -7 -18t-18 -7h-1150q-11 0 -18 7t-7 18zM200 500h200l50 -200h300l50 200h200l-97 500h-606z" />
<glyph unicode="&#xe029;" d="M4 600q0 162 80 299t217 217t299 80t299 -80t217 -217t80 -299t-80 -299t-217 -217t-299 -80t-299 80t-217 217t-80 299zM186 600q0 -172 121.5 -293t292.5 -121t292.5 121t121.5 293q0 171 -121.5 292.5t-292.5 121.5t-292.5 -121.5t-121.5 -292.5zM500 397v401 l297 -200z" />
<glyph unicode="&#xe030;" d="M23 600q0 -118 45.5 -224.5t123 -184t184 -123t224.5 -45.5t224.5 45.5t184 123t123 184t45.5 224.5h-150q0 -177 -125 -302t-302 -125t-302 125t-125 302t125 302t302 125q136 0 246 -81l-146 -146h400v400l-145 -145q-157 122 -355 122q-118 0 -224.5 -45.5t-184 -123 t-123 -184t-45.5 -224.5z" />
<glyph unicode="&#xe031;" d="M23 600q0 118 45.5 224.5t123 184t184 123t224.5 45.5q198 0 355 -122l145 145v-400h-400l147 147q-112 80 -247 80q-177 0 -302 -125t-125 -302h-150zM100 0v400h400l-147 -147q112 -80 247 -80q177 0 302 125t125 302h150q0 -118 -45.5 -224.5t-123 -184t-184 -123 t-224.5 -45.5q-198 0 -355 122z" />
<glyph unicode="&#xe032;" d="M100 0h1100v1200h-1100v-1200zM200 100v900h900v-900h-900zM300 200v100h100v-100h-100zM300 400v100h100v-100h-100zM300 600v100h100v-100h-100zM300 800v100h100v-100h-100zM500 200h500v100h-500v-100zM500 400v100h500v-100h-500zM500 600v100h500v-100h-500z M500 800v100h500v-100h-500z" />
<glyph unicode="&#xe033;" d="M0 100v600q0 41 29.5 70.5t70.5 29.5h100v200q0 82 59 141t141 59h300q82 0 141 -59t59 -141v-200h100q41 0 70.5 -29.5t29.5 -70.5v-600q0 -41 -29.5 -70.5t-70.5 -29.5h-900q-41 0 -70.5 29.5t-29.5 70.5zM400 800h300v150q0 21 -14.5 35.5t-35.5 14.5h-200 q-21 0 -35.5 -14.5t-14.5 -35.5v-150z" />
<glyph unicode="&#xe034;" d="M100 0v1100h100v-1100h-100zM300 400q60 60 127.5 84t127.5 17.5t122 -23t119 -30t110 -11t103 42t91 120.5v500q-40 -81 -101.5 -115.5t-127.5 -29.5t-138 25t-139.5 40t-125.5 25t-103 -29.5t-65 -115.5v-500z" />
<glyph unicode="&#xe035;" d="M0 275q0 -11 7 -18t18 -7h50q11 0 18 7t7 18v300q0 127 70.5 231.5t184.5 161.5t245 57t245 -57t184.5 -161.5t70.5 -231.5v-300q0 -11 7 -18t18 -7h50q11 0 18 7t7 18v300q0 116 -49.5 227t-131 192.5t-192.5 131t-227 49.5t-227 -49.5t-192.5 -131t-131 -192.5 t-49.5 -227v-300zM200 20v460q0 8 6 14t14 6h160q8 0 14 -6t6 -14v-460q0 -8 -6 -14t-14 -6h-160q-8 0 -14 6t-6 14zM800 20v460q0 8 6 14t14 6h160q8 0 14 -6t6 -14v-460q0 -8 -6 -14t-14 -6h-160q-8 0 -14 6t-6 14z" />
<glyph unicode="&#xe036;" d="M0 400h300l300 -200v800l-300 -200h-300v-400zM688 459l141 141l-141 141l71 71l141 -141l141 141l71 -71l-141 -141l141 -141l-71 -71l-141 141l-141 -141z" />
<glyph unicode="&#xe037;" d="M0 400h300l300 -200v800l-300 -200h-300v-400zM700 857l69 53q111 -135 111 -310q0 -169 -106 -302l-67 54q86 110 86 248q0 146 -93 257z" />
<glyph unicode="&#xe038;" d="M0 401v400h300l300 200v-800l-300 200h-300zM702 858l69 53q111 -135 111 -310q0 -170 -106 -303l-67 55q86 110 86 248q0 145 -93 257zM889 951l7 -8q123 -151 123 -344q0 -189 -119 -339l-7 -8l81 -66l6 8q142 178 142 405q0 230 -144 408l-6 8z" />
<glyph unicode="&#xe039;" d="M0 0h500v500h-200v100h-100v-100h-200v-500zM0 600h100v100h400v100h100v100h-100v300h-500v-600zM100 100v300h300v-300h-300zM100 800v300h300v-300h-300zM200 200v100h100v-100h-100zM200 900h100v100h-100v-100zM500 500v100h300v-300h200v-100h-100v-100h-200v100 h-100v100h100v200h-200zM600 0v100h100v-100h-100zM600 1000h100v-300h200v-300h300v200h-200v100h200v500h-600v-200zM800 800v300h300v-300h-300zM900 0v100h300v-100h-300zM900 900v100h100v-100h-100zM1100 200v100h100v-100h-100z" />
<glyph unicode="&#xe040;" d="M0 200h100v1000h-100v-1000zM100 0v100h300v-100h-300zM200 200v1000h100v-1000h-100zM500 0v91h100v-91h-100zM500 200v1000h200v-1000h-200zM700 0v91h100v-91h-100zM800 200v1000h100v-1000h-100zM900 0v91h200v-91h-200zM1000 200v1000h200v-1000h-200z" />
<glyph unicode="&#xe041;" d="M0 700l1 475q0 10 7.5 17.5t17.5 7.5h474l700 -700l-500 -500zM148 953q0 -42 29 -71q30 -30 71.5 -30t71.5 30q29 29 29 71t-29 71q-30 30 -71.5 30t-71.5 -30q-29 -29 -29 -71z" />
<glyph unicode="&#xe042;" d="M1 700l1 475q0 11 7 18t18 7h474l700 -700l-500 -500zM148 953q0 -42 30 -71q29 -30 71 -30t71 30q30 29 30 71t-30 71q-29 30 -71 30t-71 -30q-30 -29 -30 -71zM701 1200h100l700 -700l-500 -500l-50 50l450 450z" />
<glyph unicode="&#xe043;" d="M100 0v1025l175 175h925v-1000l-100 -100v1000h-750l-100 -100h750v-1000h-900z" />
<glyph unicode="&#xe044;" d="M200 0l450 444l450 -443v1150q0 20 -14.5 35t-35.5 15h-800q-21 0 -35.5 -15t-14.5 -35v-1151z" />
<glyph unicode="&#xe045;" d="M0 100v700h200l100 -200h600l100 200h200v-700h-200v200h-800v-200h-200zM253 829l40 -124h592l62 124l-94 346q-2 11 -10 18t-18 7h-450q-10 0 -18 -7t-10 -18zM281 24l38 152q2 10 11.5 17t19.5 7h500q10 0 19.5 -7t11.5 -17l38 -152q2 -10 -3.5 -17t-15.5 -7h-600 q-10 0 -15.5 7t-3.5 17z" />
<glyph unicode="&#xe046;" d="M0 200q0 -41 29.5 -70.5t70.5 -29.5h1000q41 0 70.5 29.5t29.5 70.5v600q0 41 -29.5 70.5t-70.5 29.5h-150q-4 8 -11.5 21.5t-33 48t-53 61t-69 48t-83.5 21.5h-200q-41 0 -82 -20.5t-70 -50t-52 -59t-34 -50.5l-12 -20h-150q-41 0 -70.5 -29.5t-29.5 -70.5v-600z M356 500q0 100 72 172t172 72t172 -72t72 -172t-72 -172t-172 -72t-172 72t-72 172zM494 500q0 -44 31 -75t75 -31t75 31t31 75t-31 75t-75 31t-75 -31t-31 -75zM900 700v100h100v-100h-100z" />
<glyph unicode="&#xe047;" d="M53 0h365v66q-41 0 -72 11t-49 38t1 71l92 234h391l82 -222q16 -45 -5.5 -88.5t-74.5 -43.5v-66h417v66q-34 1 -74 43q-18 19 -33 42t-21 37l-6 13l-385 998h-93l-399 -1006q-24 -48 -52 -75q-12 -12 -33 -25t-36 -20l-15 -7v-66zM416 521l178 457l46 -140l116 -317h-340 z" />
<glyph unicode="&#xe048;" d="M100 0v89q41 7 70.5 32.5t29.5 65.5v827q0 28 -1 39.5t-5.5 26t-15.5 21t-29 14t-49 14.5v71l471 -1q120 0 213 -88t93 -228q0 -55 -11.5 -101.5t-28 -74t-33.5 -47.5t-28 -28l-12 -7q8 -3 21.5 -9t48 -31.5t60.5 -58t47.5 -91.5t21.5 -129q0 -84 -59 -156.5t-142 -111 t-162 -38.5h-500zM400 200h161q89 0 153 48.5t64 132.5q0 90 -62.5 154.5t-156.5 64.5h-159v-400zM400 700h139q76 0 130 61.5t54 138.5q0 82 -84 130.5t-239 48.5v-379z" />
<glyph unicode="&#xe049;" d="M200 0v57q77 7 134.5 40.5t65.5 80.5l173 849q10 56 -10 74t-91 37q-6 1 -10.5 2.5t-9.5 2.5v57h425l2 -57q-33 -8 -62 -25.5t-46 -37t-29.5 -38t-17.5 -30.5l-5 -12l-128 -825q-10 -52 14 -82t95 -36v-57h-500z" />
<glyph unicode="&#xe050;" d="M-75 200h75v800h-75l125 167l125 -167h-75v-800h75l-125 -167zM300 900v300h150h700h150v-300h-50q0 29 -8 48.5t-18.5 30t-33.5 15t-39.5 5.5t-50.5 1h-200v-850l100 -50v-100h-400v100l100 50v850h-200q-34 0 -50.5 -1t-40 -5.5t-33.5 -15t-18.5 -30t-8.5 -48.5h-49z " />
<glyph unicode="&#xe051;" d="M33 51l167 125v-75h800v75l167 -125l-167 -125v75h-800v-75zM100 901v300h150h700h150v-300h-50q0 29 -8 48.5t-18 30t-33.5 15t-40 5.5t-50.5 1h-200v-650l100 -50v-100h-400v100l100 50v650h-200q-34 0 -50.5 -1t-39.5 -5.5t-33.5 -15t-18.5 -30t-8 -48.5h-50z" />
<glyph unicode="&#xe052;" d="M0 50q0 -20 14.5 -35t35.5 -15h1100q21 0 35.5 15t14.5 35v100q0 21 -14.5 35.5t-35.5 14.5h-1100q-21 0 -35.5 -14.5t-14.5 -35.5v-100zM0 350q0 -20 14.5 -35t35.5 -15h800q21 0 35.5 15t14.5 35v100q0 21 -14.5 35.5t-35.5 14.5h-800q-21 0 -35.5 -14.5t-14.5 -35.5 v-100zM0 650q0 -20 14.5 -35t35.5 -15h1000q21 0 35.5 15t14.5 35v100q0 21 -14.5 35.5t-35.5 14.5h-1000q-21 0 -35.5 -14.5t-14.5 -35.5v-100zM0 950q0 -20 14.5 -35t35.5 -15h600q21 0 35.5 15t14.5 35v100q0 21 -14.5 35.5t-35.5 14.5h-600q-21 0 -35.5 -14.5 t-14.5 -35.5v-100z" />
<glyph unicode="&#xe053;" d="M0 50q0 -20 14.5 -35t35.5 -15h1100q21 0 35.5 15t14.5 35v100q0 21 -14.5 35.5t-35.5 14.5h-1100q-21 0 -35.5 -14.5t-14.5 -35.5v-100zM0 650q0 -20 14.5 -35t35.5 -15h1100q21 0 35.5 15t14.5 35v100q0 21 -14.5 35.5t-35.5 14.5h-1100q-21 0 -35.5 -14.5t-14.5 -35.5 v-100zM200 350q0 -20 14.5 -35t35.5 -15h700q21 0 35.5 15t14.5 35v100q0 21 -14.5 35.5t-35.5 14.5h-700q-21 0 -35.5 -14.5t-14.5 -35.5v-100zM200 950q0 -20 14.5 -35t35.5 -15h700q21 0 35.5 15t14.5 35v100q0 21 -14.5 35.5t-35.5 14.5h-700q-21 0 -35.5 -14.5 t-14.5 -35.5v-100z" />
<glyph unicode="&#xe054;" d="M0 50v100q0 21 14.5 35.5t35.5 14.5h1100q21 0 35.5 -14.5t14.5 -35.5v-100q0 -20 -14.5 -35t-35.5 -15h-1100q-21 0 -35.5 15t-14.5 35zM100 650v100q0 21 14.5 35.5t35.5 14.5h1000q21 0 35.5 -14.5t14.5 -35.5v-100q0 -20 -14.5 -35t-35.5 -15h-1000q-21 0 -35.5 15 t-14.5 35zM300 350v100q0 21 14.5 35.5t35.5 14.5h800q21 0 35.5 -14.5t14.5 -35.5v-100q0 -20 -14.5 -35t-35.5 -15h-800q-21 0 -35.5 15t-14.5 35zM500 950v100q0 21 14.5 35.5t35.5 14.5h600q21 0 35.5 -14.5t14.5 -35.5v-100q0 -20 -14.5 -35t-35.5 -15h-600 q-21 0 -35.5 15t-14.5 35z" />
<glyph unicode="&#xe055;" d="M0 50v100q0 21 14.5 35.5t35.5 14.5h1100q21 0 35.5 -14.5t14.5 -35.5v-100q0 -20 -14.5 -35t-35.5 -15h-1100q-21 0 -35.5 15t-14.5 35zM0 350v100q0 21 14.5 35.5t35.5 14.5h1100q21 0 35.5 -14.5t14.5 -35.5v-100q0 -20 -14.5 -35t-35.5 -15h-1100q-21 0 -35.5 15 t-14.5 35zM0 650v100q0 21 14.5 35.5t35.5 14.5h1100q21 0 35.5 -14.5t14.5 -35.5v-100q0 -20 -14.5 -35t-35.5 -15h-1100q-21 0 -35.5 15t-14.5 35zM0 950v100q0 21 14.5 35.5t35.5 14.5h1100q21 0 35.5 -14.5t14.5 -35.5v-100q0 -20 -14.5 -35t-35.5 -15h-1100 q-21 0 -35.5 15t-14.5 35z" />
<glyph unicode="&#xe056;" d="M0 50v100q0 21 14.5 35.5t35.5 14.5h100q21 0 35.5 -14.5t14.5 -35.5v-100q0 -20 -14.5 -35t-35.5 -15h-100q-21 0 -35.5 15t-14.5 35zM0 350v100q0 21 14.5 35.5t35.5 14.5h100q21 0 35.5 -14.5t14.5 -35.5v-100q0 -20 -14.5 -35t-35.5 -15h-100q-21 0 -35.5 15 t-14.5 35zM0 650v100q0 21 14.5 35.5t35.5 14.5h100q21 0 35.5 -14.5t14.5 -35.5v-100q0 -20 -14.5 -35t-35.5 -15h-100q-21 0 -35.5 15t-14.5 35zM0 950v100q0 21 14.5 35.5t35.5 14.5h100q21 0 35.5 -14.5t14.5 -35.5v-100q0 -20 -14.5 -35t-35.5 -15h-100q-21 0 -35.5 15 t-14.5 35zM300 50v100q0 21 14.5 35.5t35.5 14.5h800q21 0 35.5 -14.5t14.5 -35.5v-100q0 -20 -14.5 -35t-35.5 -15h-800q-21 0 -35.5 15t-14.5 35zM300 350v100q0 21 14.5 35.5t35.5 14.5h800q21 0 35.5 -14.5t14.5 -35.5v-100q0 -20 -14.5 -35t-35.5 -15h-800 q-21 0 -35.5 15t-14.5 35zM300 650v100q0 21 14.5 35.5t35.5 14.5h800q21 0 35.5 -14.5t14.5 -35.5v-100q0 -20 -14.5 -35t-35.5 -15h-800q-21 0 -35.5 15t-14.5 35zM300 950v100q0 21 14.5 35.5t35.5 14.5h800q21 0 35.5 -14.5t14.5 -35.5v-100q0 -20 -14.5 -35t-35.5 -15 h-800q-21 0 -35.5 15t-14.5 35z" />
<glyph unicode="&#xe057;" d="M-101 500v100h201v75l166 -125l-166 -125v75h-201zM300 0h100v1100h-100v-1100zM500 50q0 -20 14.5 -35t35.5 -15h600q20 0 35 15t15 35v100q0 21 -15 35.5t-35 14.5h-600q-21 0 -35.5 -14.5t-14.5 -35.5v-100zM500 350q0 -20 14.5 -35t35.5 -15h300q20 0 35 15t15 35 v100q0 21 -15 35.5t-35 14.5h-300q-21 0 -35.5 -14.5t-14.5 -35.5v-100zM500 650q0 -20 14.5 -35t35.5 -15h500q20 0 35 15t15 35v100q0 21 -15 35.5t-35 14.5h-500q-21 0 -35.5 -14.5t-14.5 -35.5v-100zM500 950q0 -20 14.5 -35t35.5 -15h100q20 0 35 15t15 35v100 q0 21 -15 35.5t-35 14.5h-100q-21 0 -35.5 -14.5t-14.5 -35.5v-100z" />
<glyph unicode="&#xe058;" d="M1 50q0 -20 14.5 -35t35.5 -15h600q20 0 35 15t15 35v100q0 21 -15 35.5t-35 14.5h-600q-21 0 -35.5 -14.5t-14.5 -35.5v-100zM1 350q0 -20 14.5 -35t35.5 -15h300q20 0 35 15t15 35v100q0 21 -15 35.5t-35 14.5h-300q-21 0 -35.5 -14.5t-14.5 -35.5v-100zM1 650 q0 -20 14.5 -35t35.5 -15h500q20 0 35 15t15 35v100q0 21 -15 35.5t-35 14.5h-500q-21 0 -35.5 -14.5t-14.5 -35.5v-100zM1 950q0 -20 14.5 -35t35.5 -15h100q20 0 35 15t15 35v100q0 21 -15 35.5t-35 14.5h-100q-21 0 -35.5 -14.5t-14.5 -35.5v-100zM801 0v1100h100v-1100 h-100zM934 550l167 -125v75h200v100h-200v75z" />
<glyph unicode="&#xe059;" d="M0 275v650q0 31 22 53t53 22h750q31 0 53 -22t22 -53v-650q0 -31 -22 -53t-53 -22h-750q-31 0 -53 22t-22 53zM900 600l300 300v-600z" />
<glyph unicode="&#xe060;" d="M0 44v1012q0 18 13 31t31 13h1112q19 0 31.5 -13t12.5 -31v-1012q0 -18 -12.5 -31t-31.5 -13h-1112q-18 0 -31 13t-13 31zM100 263l247 182l298 -131l-74 156l293 318l236 -288v500h-1000v-737zM208 750q0 56 39 95t95 39t95 -39t39 -95t-39 -95t-95 -39t-95 39t-39 95z " />
<glyph unicode="&#xe062;" d="M148 745q0 124 60.5 231.5t165 172t226.5 64.5q123 0 227 -63t164.5 -169.5t60.5 -229.5t-73 -272q-73 -114 -166.5 -237t-150.5 -189l-57 -66q-10 9 -27 26t-66.5 70.5t-96 109t-104 135.5t-100.5 155q-63 139 -63 262zM342 772q0 -107 75.5 -182.5t181.5 -75.5 q107 0 182.5 75.5t75.5 182.5t-75.5 182t-182.5 75t-182 -75.5t-75 -181.5z" />
<glyph unicode="&#xe063;" d="M1 600q0 122 47.5 233t127.5 191t191 127.5t233 47.5t233 -47.5t191 -127.5t127.5 -191t47.5 -233t-47.5 -233t-127.5 -191t-191 -127.5t-233 -47.5t-233 47.5t-191 127.5t-127.5 191t-47.5 233zM173 600q0 -177 125.5 -302t301.5 -125v854q-176 0 -301.5 -125 t-125.5 -302z" />
<glyph unicode="&#xe064;" d="M117 406q0 94 34 186t88.5 172.5t112 159t115 177t87.5 194.5q21 -71 57.5 -142.5t76 -130.5t83 -118.5t82 -117t70 -116t50 -125.5t18.5 -136q0 -89 -39 -165.5t-102 -126.5t-140 -79.5t-156 -33.5q-114 6 -211.5 53t-161.5 139t-64 210zM243 414q14 -82 59.5 -136 t136.5 -80l16 98q-7 6 -18 17t-34 48t-33 77q-15 73 -14 143.5t10 122.5l9 51q-92 -110 -119.5 -185t-12.5 -156z" />
<glyph unicode="&#xe065;" d="M0 400v300q0 165 117.5 282.5t282.5 117.5q366 -6 397 -14l-186 -186h-311q-41 0 -70.5 -29.5t-29.5 -70.5v-500q0 -41 29.5 -70.5t70.5 -29.5h500q41 0 70.5 29.5t29.5 70.5v125l200 200v-225q0 -165 -117.5 -282.5t-282.5 -117.5h-300q-165 0 -282.5 117.5 t-117.5 282.5zM436 341l161 50l412 412l-114 113l-405 -405zM995 1015l113 -113l113 113l-21 85l-92 28z" />
<glyph unicode="&#xe066;" d="M0 400v300q0 165 117.5 282.5t282.5 117.5h261l2 -80q-133 -32 -218 -120h-145q-41 0 -70.5 -29.5t-29.5 -70.5v-500q0 -41 29.5 -70.5t70.5 -29.5h500q41 0 70.5 29.5t29.5 70.5l200 153v-53q0 -165 -117.5 -282.5t-282.5 -117.5h-300q-165 0 -282.5 117.5t-117.5 282.5 zM423 524q30 38 81.5 64t103 35.5t99 14t77.5 3.5l29 -1v-209l360 324l-359 318v-216q-7 0 -19 -1t-48 -8t-69.5 -18.5t-76.5 -37t-76.5 -59t-62 -88t-39.5 -121.5z" />
<glyph unicode="&#xe067;" d="M0 400v300q0 165 117.5 282.5t282.5 117.5h300q61 0 127 -23l-178 -177h-349q-41 0 -70.5 -29.5t-29.5 -70.5v-500q0 -41 29.5 -70.5t70.5 -29.5h500q41 0 70.5 29.5t29.5 70.5v69l200 200v-169q0 -165 -117.5 -282.5t-282.5 -117.5h-300q-165 0 -282.5 117.5 t-117.5 282.5zM342 632l283 -284l567 567l-137 137l-430 -431l-146 147z" />
<glyph unicode="&#xe068;" d="M0 603l300 296v-198h200v200h-200l300 300l295 -300h-195v-200h200v198l300 -296l-300 -300v198h-200v-200h195l-295 -300l-300 300h200v200h-200v-198z" />
<glyph unicode="&#xe069;" d="M200 50v1000q0 21 14.5 35.5t35.5 14.5h100q21 0 35.5 -14.5t14.5 -35.5v-437l500 487v-1100l-500 488v-438q0 -21 -14.5 -35.5t-35.5 -14.5h-100q-21 0 -35.5 14.5t-14.5 35.5z" />
<glyph unicode="&#xe070;" d="M0 50v1000q0 21 14.5 35.5t35.5 14.5h100q21 0 35.5 -14.5t14.5 -35.5v-437l500 487v-487l500 487v-1100l-500 488v-488l-500 488v-438q0 -21 -14.5 -35.5t-35.5 -14.5h-100q-21 0 -35.5 14.5t-14.5 35.5z" />
<glyph unicode="&#xe071;" d="M136 550l564 550v-487l500 487v-1100l-500 488v-488z" />
<glyph unicode="&#xe072;" d="M200 0l900 550l-900 550v-1100z" />
<glyph unicode="&#xe073;" d="M200 150q0 -21 14.5 -35.5t35.5 -14.5h200q21 0 35.5 14.5t14.5 35.5v800q0 21 -14.5 35.5t-35.5 14.5h-200q-21 0 -35.5 -14.5t-14.5 -35.5v-800zM600 150q0 -21 14.5 -35.5t35.5 -14.5h200q21 0 35.5 14.5t14.5 35.5v800q0 21 -14.5 35.5t-35.5 14.5h-200 q-21 0 -35.5 -14.5t-14.5 -35.5v-800z" />
<glyph unicode="&#xe074;" d="M200 150q0 -20 14.5 -35t35.5 -15h800q21 0 35.5 15t14.5 35v800q0 21 -14.5 35.5t-35.5 14.5h-800q-21 0 -35.5 -14.5t-14.5 -35.5v-800z" />
<glyph unicode="&#xe075;" d="M0 0v1100l500 -487v487l564 -550l-564 -550v488z" />
<glyph unicode="&#xe076;" d="M0 0v1100l500 -487v487l500 -487v437q0 21 14.5 35.5t35.5 14.5h100q21 0 35.5 -14.5t14.5 -35.5v-1000q0 -21 -14.5 -35.5t-35.5 -14.5h-100q-21 0 -35.5 14.5t-14.5 35.5v438l-500 -488v488z" />
<glyph unicode="&#xe077;" d="M300 0v1100l500 -487v437q0 21 14.5 35.5t35.5 14.5h100q21 0 35.5 -14.5t14.5 -35.5v-1000q0 -21 -14.5 -35.5t-35.5 -14.5h-100q-21 0 -35.5 14.5t-14.5 35.5v438z" />
<glyph unicode="&#xe078;" d="M100 250v100q0 21 14.5 35.5t35.5 14.5h1000q21 0 35.5 -14.5t14.5 -35.5v-100q0 -21 -14.5 -35.5t-35.5 -14.5h-1000q-21 0 -35.5 14.5t-14.5 35.5zM100 500h1100l-550 564z" />
<glyph unicode="&#xe079;" d="M185 599l592 -592l240 240l-353 353l353 353l-240 240z" />
<glyph unicode="&#xe080;" d="M272 194l353 353l-353 353l241 240l572 -571l21 -22l-1 -1v-1l-592 -591z" />
<glyph unicode="&#xe081;" d="M3 600q0 162 80 299.5t217.5 217.5t299.5 80t299.5 -80t217.5 -217.5t80 -299.5t-80 -299.5t-217.5 -217.5t-299.5 -80t-299.5 80t-217.5 217.5t-80 299.5zM300 500h200v-200h200v200h200v200h-200v200h-200v-200h-200v-200z" />
<glyph unicode="&#xe082;" d="M3 600q0 162 80 299.5t217.5 217.5t299.5 80t299.5 -80t217.5 -217.5t80 -299.5t-80 -299.5t-217.5 -217.5t-299.5 -80t-299.5 80t-217.5 217.5t-80 299.5zM300 500h600v200h-600v-200z" />
<glyph unicode="&#xe083;" d="M3 600q0 162 80 299.5t217.5 217.5t299.5 80t299.5 -80t217.5 -217.5t80 -299.5t-80 -299.5t-217.5 -217.5t-299.5 -80t-299.5 80t-217.5 217.5t-80 299.5zM246 459l213 -213l141 142l141 -142l213 213l-142 141l142 141l-213 212l-141 -141l-141 142l-212 -213l141 -141 z" />
<glyph unicode="&#xe084;" d="M3 600q0 162 80 299.5t217.5 217.5t299.5 80t299.5 -80t217.5 -217.5t80 -299.5t-80 -299.5t-217.5 -217.5t-299.5 -80t-299.5 80t-217.5 217.5t-80 299.5zM270 551l276 -277l411 411l-175 174l-236 -236l-102 102z" />
<glyph unicode="&#xe085;" d="M3 600q0 162 80 299.5t217.5 217.5t299.5 80t299.5 -80t217.5 -217.5t80 -299.5t-80 -299.5t-217.5 -217.5t-299.5 -80t-299.5 80t-217.5 217.5t-80 299.5zM364 700h143q4 0 11.5 -1t11 -1t6.5 3t3 9t1 11t3.5 8.5t3.5 6t5.5 4t6.5 2.5t9 1.5t9 0.5h11.5h12.5 q19 0 30 -10t11 -26q0 -22 -4 -28t-27 -22q-5 -1 -12.5 -3t-27 -13.5t-34 -27t-26.5 -46t-11 -68.5h200q5 3 14 8t31.5 25.5t39.5 45.5t31 69t14 94q0 51 -17.5 89t-42 58t-58.5 32t-58.5 15t-51.5 3q-50 0 -90.5 -12t-75 -38.5t-53.5 -74.5t-19 -114zM500 300h200v100h-200 v-100z" />
<glyph unicode="&#xe086;" d="M3 600q0 162 80 299.5t217.5 217.5t299.5 80t299.5 -80t217.5 -217.5t80 -299.5t-80 -299.5t-217.5 -217.5t-299.5 -80t-299.5 80t-217.5 217.5t-80 299.5zM400 300h400v100h-100v300h-300v-100h100v-200h-100v-100zM500 800h200v100h-200v-100z" />
<glyph unicode="&#xe087;" d="M0 500v200h195q31 125 98.5 199.5t206.5 100.5v200h200v-200q54 -20 113 -60t112.5 -105.5t71.5 -134.5h203v-200h-203q-25 -102 -116.5 -186t-180.5 -117v-197h-200v197q-140 27 -208 102.5t-98 200.5h-194zM290 500q24 -73 79.5 -127.5t130.5 -78.5v206h200v-206 q149 48 201 206h-201v200h200q-25 74 -75.5 127t-124.5 77v-204h-200v203q-75 -23 -130 -77t-79 -126h209v-200h-210z" />
<glyph unicode="&#xe088;" d="M4 600q0 162 80 299t217 217t299 80t299 -80t217 -217t80 -299t-80 -299t-217 -217t-299 -80t-299 80t-217 217t-80 299zM186 600q0 -171 121.5 -292.5t292.5 -121.5t292.5 121.5t121.5 292.5t-121.5 292.5t-292.5 121.5t-292.5 -121.5t-121.5 -292.5zM356 465l135 135 l-135 135l109 109l135 -135l135 135l109 -109l-135 -135l135 -135l-109 -109l-135 135l-135 -135z" />
<glyph unicode="&#xe089;" d="M4 600q0 162 80 299t217 217t299 80t299 -80t217 -217t80 -299t-80 -299t-217 -217t-299 -80t-299 80t-217 217t-80 299zM186 600q0 -171 121.5 -292.5t292.5 -121.5t292.5 121.5t121.5 292.5t-121.5 292.5t-292.5 121.5t-292.5 -121.5t-121.5 -292.5zM322 537l141 141 l87 -87l204 205l142 -142l-346 -345z" />
<glyph unicode="&#xe090;" d="M4 600q0 162 80 299t217 217t299 80t299 -80t217 -217t80 -299t-80 -299t-217 -217t-299 -80t-299 80t-217 217t-80 299zM186 600q0 -115 62 -215l568 567q-100 62 -216 62q-171 0 -292.5 -121.5t-121.5 -292.5zM391 245q97 -59 209 -59q171 0 292.5 121.5t121.5 292.5 q0 112 -59 209z" />
<glyph unicode="&#xe091;" d="M0 547l600 453v-300h600v-300h-600v-301z" />
<glyph unicode="&#xe092;" d="M0 400v300h600v300l600 -453l-600 -448v301h-600z" />
<glyph unicode="&#xe093;" d="M204 600l450 600l444 -600h-298v-600h-300v600h-296z" />
<glyph unicode="&#xe094;" d="M104 600h296v600h300v-600h298l-449 -600z" />
<glyph unicode="&#xe095;" d="M0 200q6 132 41 238.5t103.5 193t184 138t271.5 59.5v271l600 -453l-600 -448v301q-95 -2 -183 -20t-170 -52t-147 -92.5t-100 -135.5z" />
<glyph unicode="&#xe096;" d="M0 0v400l129 -129l294 294l142 -142l-294 -294l129 -129h-400zM635 777l142 -142l294 294l129 -129v400h-400l129 -129z" />
<glyph unicode="&#xe097;" d="M34 176l295 295l-129 129h400v-400l-129 130l-295 -295zM600 600v400l129 -129l295 295l142 -141l-295 -295l129 -130h-400z" />
<glyph unicode="&#xe101;" d="M23 600q0 118 45.5 224.5t123 184t184 123t224.5 45.5t224.5 -45.5t184 -123t123 -184t45.5 -224.5t-45.5 -224.5t-123 -184t-184 -123t-224.5 -45.5t-224.5 45.5t-184 123t-123 184t-45.5 224.5zM456 851l58 -302q4 -20 21.5 -34.5t37.5 -14.5h54q20 0 37.5 14.5 t21.5 34.5l58 302q4 20 -8 34.5t-32 14.5h-207q-21 0 -33 -14.5t-8 -34.5zM500 300h200v100h-200v-100z" />
<glyph unicode="&#xe102;" d="M0 800h100v-200h400v300h200v-300h400v200h100v100h-111q1 1 1 6.5t-1.5 15t-3.5 17.5l-34 172q-11 39 -41.5 63t-69.5 24q-32 0 -61 -17l-239 -144q-22 -13 -40 -35q-19 24 -40 36l-238 144q-33 18 -62 18q-39 0 -69.5 -23t-40.5 -61l-35 -177q-2 -8 -3 -18t-1 -15v-6 h-111v-100zM100 0h400v400h-400v-400zM200 900q-3 0 14 48t36 96l18 47l213 -191h-281zM700 0v400h400v-400h-400zM731 900l202 197q5 -12 12 -32.5t23 -64t25 -72t7 -28.5h-269z" />
<glyph unicode="&#xe103;" d="M0 -22v143l216 193q-9 53 -13 83t-5.5 94t9 113t38.5 114t74 124q47 60 99.5 102.5t103 68t127.5 48t145.5 37.5t184.5 43.5t220 58.5q0 -189 -22 -343t-59 -258t-89 -181.5t-108.5 -120t-122 -68t-125.5 -30t-121.5 -1.5t-107.5 12.5t-87.5 17t-56.5 7.5l-99 -55z M238.5 300.5q19.5 -6.5 86.5 76.5q55 66 367 234q70 38 118.5 69.5t102 79t99 111.5t86.5 148q22 50 24 60t-6 19q-7 5 -17 5t-26.5 -14.5t-33.5 -39.5q-35 -51 -113.5 -108.5t-139.5 -89.5l-61 -32q-369 -197 -458 -401q-48 -111 -28.5 -117.5z" />
<glyph unicode="&#xe104;" d="M111 408q0 -33 5 -63q9 -56 44 -119.5t105 -108.5q31 -21 64 -16t62 23.5t57 49.5t48 61.5t35 60.5q32 66 39 184.5t-13 157.5q79 -80 122 -164t26 -184q-5 -33 -20.5 -69.5t-37.5 -80.5q-10 -19 -14.5 -29t-12 -26t-9 -23.5t-3 -19t2.5 -15.5t11 -9.5t19.5 -5t30.5 2.5 t42 8q57 20 91 34t87.5 44.5t87 64t65.5 88.5t47 122q38 172 -44.5 341.5t-246.5 278.5q22 -44 43 -129q39 -159 -32 -154q-15 2 -33 9q-79 33 -120.5 100t-44 175.5t48.5 257.5q-13 -8 -34 -23.5t-72.5 -66.5t-88.5 -105.5t-60 -138t-8 -166.5q2 -12 8 -41.5t8 -43t6 -39.5 t3.5 -39.5t-1 -33.5t-6 -31.5t-13.5 -24t-21 -20.5t-31 -12q-38 -10 -67 13t-40.5 61.5t-15 81.5t10.5 75q-52 -46 -83.5 -101t-39 -107t-7.5 -85z" />
<glyph unicode="&#xe105;" d="M-61 600l26 40q6 10 20 30t49 63.5t74.5 85.5t97 90t116.5 83.5t132.5 59t145.5 23.5t145.5 -23.5t132.5 -59t116.5 -83.5t97 -90t74.5 -85.5t49 -63.5t20 -30l26 -40l-26 -40q-6 -10 -20 -30t-49 -63.5t-74.5 -85.5t-97 -90t-116.5 -83.5t-132.5 -59t-145.5 -23.5 t-145.5 23.5t-132.5 59t-116.5 83.5t-97 90t-74.5 85.5t-49 63.5t-20 30zM120 600q7 -10 40.5 -58t56 -78.5t68 -77.5t87.5 -75t103 -49.5t125 -21.5t123.5 20t100.5 45.5t85.5 71.5t66.5 75.5t58 81.5t47 66q-1 1 -28.5 37.5t-42 55t-43.5 53t-57.5 63.5t-58.5 54 q49 -74 49 -163q0 -124 -88 -212t-212 -88t-212 88t-88 212q0 85 46 158q-102 -87 -226 -258zM377 656q49 -124 154 -191l105 105q-37 24 -75 72t-57 84l-20 36z" />
<glyph unicode="&#xe106;" d="M-61 600l26 40q6 10 20 30t49 63.5t74.5 85.5t97 90t116.5 83.5t132.5 59t145.5 23.5q61 0 121 -17l37 142h148l-314 -1200h-148l37 143q-82 21 -165 71.5t-140 102t-109.5 112t-72 88.5t-29.5 43zM120 600q210 -282 393 -336l37 141q-107 18 -178.5 101.5t-71.5 193.5 q0 85 46 158q-102 -87 -226 -258zM377 656q49 -124 154 -191l47 47l23 87q-30 28 -59 69t-44 68l-14 26zM780 161l38 145q22 15 44.5 34t46 44t40.5 44t41 50.5t33.5 43.5t33 44t24.5 34q-97 127 -140 175l39 146q67 -54 131.5 -125.5t87.5 -103.5t36 -52l26 -40l-26 -40 q-7 -12 -25.5 -38t-63.5 -79.5t-95.5 -102.5t-124 -100t-146.5 -79z" />
<glyph unicode="&#xe107;" d="M-97.5 34q13.5 -34 50.5 -34h1294q37 0 50.5 35.5t-7.5 67.5l-642 1056q-20 34 -48 36.5t-48 -29.5l-642 -1066q-21 -32 -7.5 -66zM155 200l445 723l445 -723h-345v100h-200v-100h-345zM500 600l100 -300l100 300v100h-200v-100z" />
<glyph unicode="&#xe108;" d="M100 262v41q0 20 11 44.5t26 38.5l363 325v339q0 62 44 106t106 44t106 -44t44 -106v-339l363 -325q15 -14 26 -38.5t11 -44.5v-41q0 -20 -12 -26.5t-29 5.5l-359 249v-263q100 -91 100 -113v-64q0 -20 -13 -28.5t-32 0.5l-94 78h-222l-94 -78q-19 -9 -32 -0.5t-13 28.5 v64q0 22 100 113v263l-359 -249q-17 -12 -29 -5.5t-12 26.5z" />
<glyph unicode="&#xe109;" d="M0 50q0 -20 14.5 -35t35.5 -15h1000q21 0 35.5 15t14.5 35v750h-1100v-750zM0 900h1100v150q0 21 -14.5 35.5t-35.5 14.5h-150v100h-100v-100h-500v100h-100v-100h-150q-21 0 -35.5 -14.5t-14.5 -35.5v-150zM100 100v100h100v-100h-100zM100 300v100h100v-100h-100z M100 500v100h100v-100h-100zM300 100v100h100v-100h-100zM300 300v100h100v-100h-100zM300 500v100h100v-100h-100zM500 100v100h100v-100h-100zM500 300v100h100v-100h-100zM500 500v100h100v-100h-100zM700 100v100h100v-100h-100zM700 300v100h100v-100h-100zM700 500 v100h100v-100h-100zM900 100v100h100v-100h-100zM900 300v100h100v-100h-100zM900 500v100h100v-100h-100z" />
<glyph unicode="&#xe110;" d="M0 200v200h259l600 600h241v198l300 -295l-300 -300v197h-159l-600 -600h-341zM0 800h259l122 -122l141 142l-181 180h-341v-200zM678 381l141 142l122 -123h159v198l300 -295l-300 -300v197h-241z" />
<glyph unicode="&#xe111;" d="M0 400v600q0 41 29.5 70.5t70.5 29.5h1000q41 0 70.5 -29.5t29.5 -70.5v-600q0 -41 -29.5 -70.5t-70.5 -29.5h-596l-304 -300v300h-100q-41 0 -70.5 29.5t-29.5 70.5z" />
<glyph unicode="&#xe112;" d="M100 600v200h300v-250q0 -113 6 -145q17 -92 102 -117q39 -11 92 -11q37 0 66.5 5.5t50 15.5t36 24t24 31.5t14 37.5t7 42t2.5 45t0 47v25v250h300v-200q0 -42 -3 -83t-15 -104t-31.5 -116t-58 -109.5t-89 -96.5t-129 -65.5t-174.5 -25.5t-174.5 25.5t-129 65.5t-89 96.5 t-58 109.5t-31.5 116t-15 104t-3 83zM100 900v300h300v-300h-300zM800 900v300h300v-300h-300z" />
<glyph unicode="&#xe113;" d="M-30 411l227 -227l352 353l353 -353l226 227l-578 579z" />
<glyph unicode="&#xe114;" d="M70 797l580 -579l578 579l-226 227l-353 -353l-352 353z" />
<glyph unicode="&#xe115;" d="M-198 700l299 283l300 -283h-203v-400h385l215 -200h-800v600h-196zM402 1000l215 -200h381v-400h-198l299 -283l299 283h-200v600h-796z" />
<glyph unicode="&#xe116;" d="M18 939q-5 24 10 42q14 19 39 19h896l38 162q5 17 18.5 27.5t30.5 10.5h94q20 0 35 -14.5t15 -35.5t-15 -35.5t-35 -14.5h-54l-201 -961q-2 -4 -6 -10.5t-19 -17.5t-33 -11h-31v-50q0 -20 -14.5 -35t-35.5 -15t-35.5 15t-14.5 35v50h-300v-50q0 -20 -14.5 -35t-35.5 -15 t-35.5 15t-14.5 35v50h-50q-21 0 -35.5 15t-14.5 35q0 21 14.5 35.5t35.5 14.5h535l48 200h-633q-32 0 -54.5 21t-27.5 43z" />
<glyph unicode="&#xe117;" d="M0 0v800h1200v-800h-1200zM0 900v100h200q0 41 29.5 70.5t70.5 29.5h300q41 0 70.5 -29.5t29.5 -70.5h500v-100h-1200z" />
<glyph unicode="&#xe118;" d="M1 0l300 700h1200l-300 -700h-1200zM1 400v600h200q0 41 29.5 70.5t70.5 29.5h300q41 0 70.5 -29.5t29.5 -70.5h500v-200h-1000z" />
<glyph unicode="&#xe119;" d="M302 300h198v600h-198l298 300l298 -300h-198v-600h198l-298 -300z" />
<glyph unicode="&#xe120;" d="M0 600l300 298v-198h600v198l300 -298l-300 -297v197h-600v-197z" />
<glyph unicode="&#xe121;" d="M0 100v100q0 41 29.5 70.5t70.5 29.5h1000q41 0 70.5 -29.5t29.5 -70.5v-100q0 -41 -29.5 -70.5t-70.5 -29.5h-1000q-41 0 -70.5 29.5t-29.5 70.5zM31 400l172 739q5 22 23 41.5t38 19.5h672q19 0 37.5 -22.5t23.5 -45.5l172 -732h-1138zM800 100h100v100h-100v-100z M1000 100h100v100h-100v-100z" />
<glyph unicode="&#xe122;" d="M-101 600v50q0 24 25 49t50 38l25 13v-250l-11 5.5t-24 14t-30 21.5t-24 27.5t-11 31.5zM100 500v250v8v8v7t0.5 7t1.5 5.5t2 5t3 4t4.5 3.5t6 1.5t7.5 0.5h200l675 250v-850l-675 200h-38l47 -276q2 -12 -3 -17.5t-11 -6t-21 -0.5h-8h-83q-20 0 -34.5 14t-18.5 35 q-55 337 -55 351zM1100 200v850q0 21 14.5 35.5t35.5 14.5q20 0 35 -14.5t15 -35.5v-850q0 -20 -15 -35t-35 -15q-21 0 -35.5 15t-14.5 35z" />
<glyph unicode="&#xe123;" d="M74 350q0 21 13.5 35.5t33.5 14.5h18l117 173l63 327q15 77 76 140t144 83l-18 32q-6 19 3 32t29 13h94q20 0 29 -10.5t3 -29.5q-18 -36 -18 -37q83 -19 144 -82.5t76 -140.5l63 -327l118 -173h17q20 0 33.5 -14.5t13.5 -35.5q0 -20 -13 -40t-31 -27q-8 -3 -23 -8.5 t-65 -20t-103 -25t-132.5 -19.5t-158.5 -9q-125 0 -245.5 20.5t-178.5 40.5l-58 20q-18 7 -31 27.5t-13 40.5zM497 110q12 -49 40 -79.5t63 -30.5t63 30.5t39 79.5q-48 -6 -102 -6t-103 6z" />
<glyph unicode="&#xe124;" d="M21 445l233 -45l-78 -224l224 78l45 -233l155 179l155 -179l45 233l224 -78l-78 224l234 45l-180 155l180 156l-234 44l78 225l-224 -78l-45 233l-155 -180l-155 180l-45 -233l-224 78l78 -225l-233 -44l179 -156z" />
<glyph unicode="&#xe125;" d="M0 200h200v600h-200v-600zM300 275q0 -75 100 -75h61q124 -100 139 -100h250q46 0 83 57l238 344q29 31 29 74v100q0 44 -30.5 84.5t-69.5 40.5h-328q28 118 28 125v150q0 44 -30.5 84.5t-69.5 40.5h-50q-27 0 -51 -20t-38 -48l-96 -198l-145 -196q-20 -26 -20 -63v-400z M400 300v375l150 213l100 212h50v-175l-50 -225h450v-125l-250 -375h-214l-136 100h-100z" />
<glyph unicode="&#xe126;" d="M0 400v600h200v-600h-200zM300 525v400q0 75 100 75h61q124 100 139 100h250q46 0 83 -57l238 -344q29 -31 29 -74v-100q0 -44 -30.5 -84.5t-69.5 -40.5h-328q28 -118 28 -125v-150q0 -44 -30.5 -84.5t-69.5 -40.5h-50q-27 0 -51 20t-38 48l-96 198l-145 196 q-20 26 -20 63zM400 525l150 -212l100 -213h50v175l-50 225h450v125l-250 375h-214l-136 -100h-100v-375z" />
<glyph unicode="&#xe127;" d="M8 200v600h200v-600h-200zM308 275v525q0 17 14 35.5t28 28.5l14 9l362 230q14 6 25 6q17 0 29 -12l109 -112q14 -14 14 -34q0 -18 -11 -32l-85 -121h302q85 0 138.5 -38t53.5 -110t-54.5 -111t-138.5 -39h-107l-130 -339q-7 -22 -20.5 -41.5t-28.5 -19.5h-341 q-7 0 -90 81t-83 94zM408 289l100 -89h293l131 339q6 21 19.5 41t28.5 20h203q16 0 25 15t9 36q0 20 -9 34.5t-25 14.5h-457h-6.5h-7.5t-6.5 0.5t-6 1t-5 1.5t-5.5 2.5t-4 4t-4 5.5q-5 12 -5 20q0 14 10 27l147 183l-86 83l-339 -236v-503z" />
<glyph unicode="&#xe128;" d="M-101 651q0 72 54 110t139 38l302 -1l-85 121q-11 16 -11 32q0 21 14 34l109 113q13 12 29 12q11 0 25 -6l365 -230q7 -4 17 -10.5t26.5 -26t16.5 -36.5v-526q0 -13 -86 -93.5t-94 -80.5h-341q-16 0 -29.5 20t-19.5 41l-130 339h-107q-84 0 -139 39t-55 111zM-1 601h222 q15 0 28.5 -20.5t19.5 -40.5l131 -339h293l107 89v502l-343 237l-87 -83l145 -184q10 -11 10 -26q0 -11 -5 -20q-1 -3 -3.5 -5.5l-4 -4t-5 -2.5t-5.5 -1.5t-6.5 -1t-6.5 -0.5h-7.5h-6.5h-476v-100zM1000 201v600h200v-600h-200z" />
<glyph unicode="&#xe129;" d="M97 719l230 -363q4 -6 10.5 -15.5t26 -25t36.5 -15.5h525q13 0 94 83t81 90v342q0 15 -20 28.5t-41 19.5l-339 131v106q0 84 -39 139t-111 55t-110 -53.5t-38 -138.5v-302l-121 84q-15 12 -33.5 11.5t-32.5 -13.5l-112 -110q-22 -22 -6 -53zM172 739l83 86l183 -146 q22 -18 47 -5q3 1 5.5 3.5l4 4t2.5 5t1.5 5.5t1 6.5t0.5 6.5v7.5v6.5v456q0 22 25 31t50 -0.5t25 -30.5v-202q0 -16 20 -29.5t41 -19.5l339 -130v-294l-89 -100h-503zM400 0v200h600v-200h-600z" />
<glyph unicode="&#xe130;" d="M2 585q-16 -31 6 -53l112 -110q13 -13 32 -13.5t34 10.5l121 85q0 -51 -0.5 -153.5t-0.5 -148.5q0 -84 38.5 -138t110.5 -54t111 55t39 139v106l339 131q20 6 40.5 19.5t20.5 28.5v342q0 7 -81 90t-94 83h-525q-17 0 -35.5 -14t-28.5 -28l-10 -15zM77 565l236 339h503 l89 -100v-294l-340 -130q-20 -6 -40 -20t-20 -29v-202q0 -22 -25 -31t-50 0t-25 31v456v14.5t-1.5 11.5t-5 12t-9.5 7q-24 13 -46 -5l-184 -146zM305 1104v200h600v-200h-600z" />
<glyph unicode="&#xe131;" d="M5 597q0 122 47.5 232.5t127.5 190.5t190.5 127.5t232.5 47.5q162 0 299.5 -80t217.5 -218t80 -300t-80 -299.5t-217.5 -217.5t-299.5 -80t-300 80t-218 217.5t-80 299.5zM298 701l2 -201h300l-2 -194l402 294l-402 298v-197h-300z" />
<glyph unicode="&#xe132;" d="M0 597q0 122 47.5 232.5t127.5 190.5t190.5 127.5t231.5 47.5q122 0 232.5 -47.5t190.5 -127.5t127.5 -190.5t47.5 -232.5q0 -162 -80 -299.5t-218 -217.5t-300 -80t-299.5 80t-217.5 217.5t-80 299.5zM200 600l402 -294l-2 194h300l2 201h-300v197z" />
<glyph unicode="&#xe133;" d="M5 597q0 122 47.5 232.5t127.5 190.5t190.5 127.5t232.5 47.5q162 0 299.5 -80t217.5 -218t80 -300t-80 -299.5t-217.5 -217.5t-299.5 -80t-300 80t-218 217.5t-80 299.5zM300 600h200v-300h200v300h200l-300 400z" />
<glyph unicode="&#xe134;" d="M5 597q0 122 47.5 232.5t127.5 190.5t190.5 127.5t232.5 47.5q162 0 299.5 -80t217.5 -218t80 -300t-80 -299.5t-217.5 -217.5t-299.5 -80t-300 80t-218 217.5t-80 299.5zM300 600l300 -400l300 400h-200v300h-200v-300h-200z" />
<glyph unicode="&#xe135;" d="M5 597q0 122 47.5 232.5t127.5 190.5t190.5 127.5t232.5 47.5q121 0 231.5 -47.5t190.5 -127.5t127.5 -190.5t47.5 -232.5q0 -162 -80 -299.5t-217.5 -217.5t-299.5 -80t-300 80t-218 217.5t-80 299.5zM254 780q-8 -33 5.5 -92.5t7.5 -87.5q0 -9 17 -44t16 -60 q12 0 23 -5.5t23 -15t20 -13.5q24 -12 108 -42q22 -8 53 -31.5t59.5 -38.5t57.5 -11q8 -18 -15 -55t-20 -57q42 -71 87 -80q0 -6 -3 -15.5t-3.5 -14.5t4.5 -17q104 -3 221 112q30 29 47 47t34.5 49t20.5 62q-14 9 -37 9.5t-36 7.5q-14 7 -49 15t-52 19q-9 0 -39.5 -0.5 t-46.5 -1.5t-39 -6.5t-39 -16.5q-50 -35 -66 -12q-4 2 -3.5 25.5t0.5 25.5q-6 13 -26.5 17t-24.5 7q2 22 -2 41t-16.5 28t-38.5 -20q-23 -25 -42 4q-19 28 -8 58q6 16 22 22q6 -1 26 -1.5t33.5 -4t19.5 -13.5q12 -19 32 -37.5t34 -27.5l14 -8q0 3 9.5 39.5t5.5 57.5 q-4 23 14.5 44.5t22.5 31.5q5 14 10 35t8.5 31t15.5 22.5t34 21.5q-6 18 10 37q8 0 23.5 -1.5t24.5 -1.5t20.5 4.5t20.5 15.5q-10 23 -30.5 42.5t-38 30t-49 26.5t-43.5 23q11 39 2 44q31 -13 58 -14.5t39 3.5l11 4q7 36 -16.5 53.5t-64.5 28.5t-56 23q-19 -3 -37 0 q-15 -12 -36.5 -21t-34.5 -12t-44 -8t-39 -6q-15 -3 -45.5 0.5t-45.5 -2.5q-21 -7 -52 -26.5t-34 -34.5q-3 -11 6.5 -22.5t8.5 -18.5q-3 -34 -27.5 -90.5t-29.5 -79.5zM518 916q3 12 16 30t16 25q10 -10 18.5 -10t14 6t14.5 14.5t16 12.5q0 -24 17 -66.5t17 -43.5 q-9 2 -31 5t-36 5t-32 8t-30 14zM692 1003h1h-1z" />
<glyph unicode="&#xe136;" d="M0 164.5q0 21.5 15 37.5l600 599q-33 101 6 201.5t135 154.5q164 92 306 -9l-259 -138l145 -232l251 126q13 -175 -151 -267q-123 -70 -253 -23l-596 -596q-15 -16 -36.5 -16t-36.5 16l-111 110q-15 15 -15 36.5z" />
<glyph unicode="&#xe137;" horiz-adv-x="1220" d="M0 196v100q0 41 29.5 70.5t70.5 29.5h1000q41 0 70.5 -29.5t29.5 -70.5v-100q0 -41 -29.5 -70.5t-70.5 -29.5h-1000q-41 0 -70.5 29.5t-29.5 70.5zM0 596v100q0 41 29.5 70.5t70.5 29.5h1000q41 0 70.5 -29.5t29.5 -70.5v-100q0 -41 -29.5 -70.5t-70.5 -29.5h-1000 q-41 0 -70.5 29.5t-29.5 70.5zM0 996v100q0 41 29.5 70.5t70.5 29.5h1000q41 0 70.5 -29.5t29.5 -70.5v-100q0 -41 -29.5 -70.5t-70.5 -29.5h-1000q-41 0 -70.5 29.5t-29.5 70.5zM600 596h500v100h-500v-100zM800 196h300v100h-300v-100zM900 996h200v100h-200v-100z" />
<glyph unicode="&#xe138;" d="M100 1100v100h1000v-100h-1000zM150 1000h900l-350 -500v-300l-200 -200v500z" />
<glyph unicode="&#xe139;" d="M0 200v200h1200v-200q0 -41 -29.5 -70.5t-70.5 -29.5h-1000q-41 0 -70.5 29.5t-29.5 70.5zM0 500v400q0 41 29.5 70.5t70.5 29.5h300v100q0 41 29.5 70.5t70.5 29.5h200q41 0 70.5 -29.5t29.5 -70.5v-100h300q41 0 70.5 -29.5t29.5 -70.5v-400h-500v100h-200v-100h-500z M500 1000h200v100h-200v-100z" />
<glyph unicode="&#xe140;" d="M0 0v400l129 -129l200 200l142 -142l-200 -200l129 -129h-400zM0 800l129 129l200 -200l142 142l-200 200l129 129h-400v-400zM729 329l142 142l200 -200l129 129v-400h-400l129 129zM729 871l200 200l-129 129h400v-400l-129 129l-200 -200z" />
<glyph unicode="&#xe141;" d="M0 596q0 162 80 299t217 217t299 80t299 -80t217 -217t80 -299t-80 -299t-217 -217t-299 -80t-299 80t-217 217t-80 299zM182 596q0 -172 121.5 -293t292.5 -121t292.5 121t121.5 293q0 171 -121.5 292.5t-292.5 121.5t-292.5 -121.5t-121.5 -292.5zM291 655 q0 23 15.5 38.5t38.5 15.5t39 -16t16 -38q0 -23 -16 -39t-39 -16q-22 0 -38 16t-16 39zM400 850q0 22 16 38.5t39 16.5q22 0 38 -16t16 -39t-16 -39t-38 -16q-23 0 -39 16.5t-16 38.5zM514 609q0 32 20.5 56.5t51.5 29.5l122 126l1 1q-9 14 -9 28q0 22 16 38.5t39 16.5 q22 0 38 -16t16 -39t-16 -39t-38 -16q-14 0 -29 10l-55 -145q17 -22 17 -51q0 -36 -25.5 -61.5t-61.5 -25.5t-61.5 25.5t-25.5 61.5zM800 655q0 22 16 38t39 16t38.5 -15.5t15.5 -38.5t-16 -39t-38 -16q-23 0 -39 16t-16 39z" />
<glyph unicode="&#xe142;" d="M-40 375q-13 -95 35 -173q35 -57 94 -89t129 -32q63 0 119 28q33 16 65 40.5t52.5 45.5t59.5 64q40 44 57 61l394 394q35 35 47 84t-3 96q-27 87 -117 104q-20 2 -29 2q-46 0 -78.5 -16.5t-67.5 -51.5l-389 -396l-7 -7l69 -67l377 373q20 22 39 38q23 23 50 23 q38 0 53 -36q16 -39 -20 -75l-547 -547q-52 -52 -125 -52q-55 0 -100 33t-54 96q-5 35 2.5 66t31.5 63t42 50t56 54q24 21 44 41l348 348q52 52 82.5 79.5t84 54t107.5 26.5q25 0 48 -4q95 -17 154 -94.5t51 -175.5q-7 -101 -98 -192l-252 -249l-253 -256l7 -7l69 -60 l517 511q67 67 95 157t11 183q-16 87 -67 154t-130 103q-69 33 -152 33q-107 0 -197 -55q-40 -24 -111 -95l-512 -512q-68 -68 -81 -163z" />
<glyph unicode="&#xe143;" d="M80 784q0 131 98.5 229.5t230.5 98.5q143 0 241 -129q103 129 246 129q129 0 226 -98.5t97 -229.5q0 -46 -17.5 -91t-61 -99t-77 -89.5t-104.5 -105.5q-197 -191 -293 -322l-17 -23l-16 23q-43 58 -100 122.5t-92 99.5t-101 100q-71 70 -104.5 105.5t-77 89.5t-61 99 t-17.5 91zM250 784q0 -27 30.5 -70t61.5 -75.5t95 -94.5l22 -22q93 -90 190 -201q82 92 195 203l12 12q64 62 97.5 97t64.5 79t31 72q0 71 -48 119.5t-105 48.5q-74 0 -132 -83l-118 -171l-114 174q-51 80 -123 80q-60 0 -109.5 -49.5t-49.5 -118.5z" />
<glyph unicode="&#xe144;" d="M57 353q0 -95 66 -159l141 -142q68 -66 159 -66q93 0 159 66l283 283q66 66 66 159t-66 159l-141 141q-8 9 -19 17l-105 -105l212 -212l-389 -389l-247 248l95 95l-18 18q-46 45 -75 101l-55 -55q-66 -66 -66 -159zM269 706q0 -93 66 -159l141 -141q7 -7 19 -17l105 105 l-212 212l389 389l247 -247l-95 -96l18 -17q47 -49 77 -100l29 29q35 35 62.5 88t27.5 96q0 93 -66 159l-141 141q-66 66 -159 66q-95 0 -159 -66l-283 -283q-66 -64 -66 -159z" />
<glyph unicode="&#xe145;" d="M200 100v953q0 21 30 46t81 48t129 38t163 15t162 -15t127 -38t79 -48t29 -46v-953q0 -41 -29.5 -70.5t-70.5 -29.5h-600q-41 0 -70.5 29.5t-29.5 70.5zM300 300h600v700h-600v-700zM496 150q0 -43 30.5 -73.5t73.5 -30.5t73.5 30.5t30.5 73.5t-30.5 73.5t-73.5 30.5 t-73.5 -30.5t-30.5 -73.5z" />
<glyph unicode="&#xe146;" d="M0 0l303 380l207 208l-210 212h300l267 279l-35 36q-15 14 -15 35t15 35q14 15 35 15t35 -15l283 -282q15 -15 15 -36t-15 -35q-14 -15 -35 -15t-35 15l-36 35l-279 -267v-300l-212 210l-208 -207z" />
<glyph unicode="&#xe148;" d="M295 433h139q5 -77 48.5 -126.5t117.5 -64.5v335q-6 1 -15.5 4t-11.5 3q-46 14 -79 26.5t-72 36t-62.5 52t-40 72.5t-16.5 99q0 92 44 159.5t109 101t144 40.5v78h100v-79q38 -4 72.5 -13.5t75.5 -31.5t71 -53.5t51.5 -84t24.5 -118.5h-159q-8 72 -35 109.5t-101 50.5 v-307l64 -14q34 -7 64 -16.5t70 -31.5t67.5 -52t47.5 -80.5t20 -112.5q0 -139 -89 -224t-244 -96v-77h-100v78q-152 17 -237 104q-40 40 -52.5 93.5t-15.5 139.5zM466 889q0 -29 8 -51t16.5 -34t29.5 -22.5t31 -13.5t38 -10q7 -2 11 -3v274q-61 -8 -97.5 -37.5t-36.5 -102.5 zM700 237q170 18 170 151q0 64 -44 99.5t-126 60.5v-311z" />
<glyph unicode="&#xe149;" d="M100 600v100h166q-24 49 -44 104q-10 26 -14.5 55.5t-3 72.5t25 90t68.5 87q97 88 263 88q129 0 230 -89t101 -208h-153q0 52 -34 89.5t-74 51.5t-76 14q-37 0 -79 -14.5t-62 -35.5q-41 -44 -41 -101q0 -28 16.5 -69.5t28 -62.5t41.5 -72h241v-100h-197q8 -50 -2.5 -115 t-31.5 -94q-41 -59 -99 -113q35 11 84 18t70 7q33 1 103 -16t103 -17q76 0 136 30l50 -147q-41 -25 -80.5 -36.5t-59 -13t-61.5 -1.5q-23 0 -128 33t-155 29q-39 -4 -82 -17t-66 -25l-24 -11l-55 145l16.5 11t15.5 10t13.5 9.5t14.5 12t14.5 14t17.5 18.5q48 55 54 126.5 t-30 142.5h-221z" />
<glyph unicode="&#xe150;" d="M2 300l298 -300l298 300h-198v900h-200v-900h-198zM602 900l298 300l298 -300h-198v-900h-200v900h-198z" />
<glyph unicode="&#xe151;" d="M2 300h198v900h200v-900h198l-298 -300zM700 0v200h100v-100h200v-100h-300zM700 400v100h300v-200h-99v-100h-100v100h99v100h-200zM700 700v500h300v-500h-100v100h-100v-100h-100zM801 900h100v200h-100v-200z" />
<glyph unicode="&#xe152;" d="M2 300h198v900h200v-900h198l-298 -300zM700 0v500h300v-500h-100v100h-100v-100h-100zM700 700v200h100v-100h200v-100h-300zM700 1100v100h300v-200h-99v-100h-100v100h99v100h-200zM801 200h100v200h-100v-200z" />
<glyph unicode="&#xe153;" d="M2 300l298 -300l298 300h-198v900h-200v-900h-198zM800 100v400h300v-500h-100v100h-200zM800 1100v100h200v-500h-100v400h-100zM901 200h100v200h-100v-200z" />
<glyph unicode="&#xe154;" d="M2 300l298 -300l298 300h-198v900h-200v-900h-198zM800 400v100h200v-500h-100v400h-100zM800 800v400h300v-500h-100v100h-200zM901 900h100v200h-100v-200z" />
<glyph unicode="&#xe155;" d="M2 300l298 -300l298 300h-198v900h-200v-900h-198zM700 100v200h500v-200h-500zM700 400v200h400v-200h-400zM700 700v200h300v-200h-300zM700 1000v200h200v-200h-200z" />
<glyph unicode="&#xe156;" d="M2 300l298 -300l298 300h-198v900h-200v-900h-198zM700 100v200h200v-200h-200zM700 400v200h300v-200h-300zM700 700v200h400v-200h-400zM700 1000v200h500v-200h-500z" />
<glyph unicode="&#xe157;" d="M0 400v300q0 165 117.5 282.5t282.5 117.5h300q162 0 281 -118.5t119 -281.5v-300q0 -165 -118.5 -282.5t-281.5 -117.5h-300q-165 0 -282.5 117.5t-117.5 282.5zM200 300q0 -41 29.5 -70.5t70.5 -29.5h500q41 0 70.5 29.5t29.5 70.5v500q0 41 -29.5 70.5t-70.5 29.5 h-500q-41 0 -70.5 -29.5t-29.5 -70.5v-500z" />
<glyph unicode="&#xe158;" d="M0 400v300q0 163 119 281.5t281 118.5h300q165 0 282.5 -117.5t117.5 -282.5v-300q0 -165 -117.5 -282.5t-282.5 -117.5h-300q-163 0 -281.5 117.5t-118.5 282.5zM200 300q0 -41 29.5 -70.5t70.5 -29.5h500q41 0 70.5 29.5t29.5 70.5v500q0 41 -29.5 70.5t-70.5 29.5 h-500q-41 0 -70.5 -29.5t-29.5 -70.5v-500zM400 300l333 250l-333 250v-500z" />
<glyph unicode="&#xe159;" d="M0 400v300q0 163 117.5 281.5t282.5 118.5h300q163 0 281.5 -119t118.5 -281v-300q0 -165 -117.5 -282.5t-282.5 -117.5h-300q-165 0 -282.5 117.5t-117.5 282.5zM200 300q0 -41 29.5 -70.5t70.5 -29.5h500q41 0 70.5 29.5t29.5 70.5v500q0 41 -29.5 70.5t-70.5 29.5 h-500q-41 0 -70.5 -29.5t-29.5 -70.5v-500zM300 700l250 -333l250 333h-500z" />
<glyph unicode="&#xe160;" d="M0 400v300q0 165 117.5 282.5t282.5 117.5h300q165 0 282.5 -117.5t117.5 -282.5v-300q0 -162 -118.5 -281t-281.5 -119h-300q-165 0 -282.5 118.5t-117.5 281.5zM200 300q0 -41 29.5 -70.5t70.5 -29.5h500q41 0 70.5 29.5t29.5 70.5v500q0 41 -29.5 70.5t-70.5 29.5 h-500q-41 0 -70.5 -29.5t-29.5 -70.5v-500zM300 400h500l-250 333z" />
<glyph unicode="&#xe161;" d="M0 400v300h300v200l400 -350l-400 -350v200h-300zM500 0v200h500q41 0 70.5 29.5t29.5 70.5v500q0 41 -29.5 70.5t-70.5 29.5h-500v200h400q165 0 282.5 -117.5t117.5 -282.5v-300q0 -165 -117.5 -282.5t-282.5 -117.5h-400z" />
<glyph unicode="&#xe162;" d="M217 519q8 -19 31 -19h302q-155 -438 -160 -458q-5 -21 4 -32l9 -8h9q14 0 26 15q11 13 274.5 321.5t264.5 308.5q14 19 5 36q-8 17 -31 17l-301 -1q1 4 78 219.5t79 227.5q2 15 -5 27l-9 9h-9q-15 0 -25 -16q-4 -6 -98 -111.5t-228.5 -257t-209.5 -237.5q-16 -19 -6 -41 z" />
<glyph unicode="&#xe163;" d="M0 400q0 -165 117.5 -282.5t282.5 -117.5h300q47 0 100 15v185h-500q-41 0 -70.5 29.5t-29.5 70.5v500q0 41 29.5 70.5t70.5 29.5h500v185q-14 4 -114 7.5t-193 5.5l-93 2q-165 0 -282.5 -117.5t-117.5 -282.5v-300zM600 400v300h300v200l400 -350l-400 -350v200h-300z " />
<glyph unicode="&#xe164;" d="M0 400q0 -165 117.5 -282.5t282.5 -117.5h300q163 0 281.5 117.5t118.5 282.5v98l-78 73l-122 -123v-148q0 -41 -29.5 -70.5t-70.5 -29.5h-500q-41 0 -70.5 29.5t-29.5 70.5v500q0 41 29.5 70.5t70.5 29.5h156l118 122l-74 78h-100q-165 0 -282.5 -117.5t-117.5 -282.5 v-300zM496 709l353 342l-149 149h500v-500l-149 149l-342 -353z" />
<glyph unicode="&#xe165;" d="M4 600q0 162 80 299t217 217t299 80t299 -80t217 -217t80 -299t-80 -299t-217 -217t-299 -80t-299 80t-217 217t-80 299zM186 600q0 -171 121.5 -292.5t292.5 -121.5t292.5 121.5t121.5 292.5t-121.5 292.5t-292.5 121.5t-292.5 -121.5t-121.5 -292.5zM406 600 q0 80 57 137t137 57t137 -57t57 -137t-57 -137t-137 -57t-137 57t-57 137z" />
<glyph unicode="&#xe166;" d="M0 0v275q0 11 7 18t18 7h1048q11 0 19 -7.5t8 -17.5v-275h-1100zM100 800l445 -500l450 500h-295v400h-300v-400h-300zM900 150h100v50h-100v-50z" />
<glyph unicode="&#xe167;" d="M0 0v275q0 11 7 18t18 7h1048q11 0 19 -7.5t8 -17.5v-275h-1100zM100 700h300v-300h300v300h295l-445 500zM900 150h100v50h-100v-50z" />
<glyph unicode="&#xe168;" d="M0 0v275q0 11 7 18t18 7h1048q11 0 19 -7.5t8 -17.5v-275h-1100zM100 705l305 -305l596 596l-154 155l-442 -442l-150 151zM900 150h100v50h-100v-50z" />
<glyph unicode="&#xe169;" d="M0 0v275q0 11 7 18t18 7h1048q11 0 19 -7.5t8 -17.5v-275h-1100zM100 988l97 -98l212 213l-97 97zM200 400l697 1l3 699l-250 -239l-149 149l-212 -212l149 -149zM900 150h100v50h-100v-50z" />
<glyph unicode="&#xe170;" d="M0 0v275q0 11 7 18t18 7h1048q11 0 19 -7.5t8 -17.5v-275h-1100zM200 612l212 -212l98 97l-213 212zM300 1200l239 -250l-149 -149l212 -212l149 148l249 -237l-1 697zM900 150h100v50h-100v-50z" />
<glyph unicode="&#xe171;" d="M23 415l1177 784v-1079l-475 272l-310 -393v416h-392zM494 210l672 938l-672 -712v-226z" />
<glyph unicode="&#xe172;" d="M0 150v1000q0 20 14.5 35t35.5 15h250v-300h500v300h100l200 -200v-850q0 -21 -15 -35.5t-35 -14.5h-150v400h-700v-400h-150q-21 0 -35.5 14.5t-14.5 35.5zM600 1000h100v200h-100v-200z" />
<glyph unicode="&#xe173;" d="M0 150v1000q0 20 14.5 35t35.5 15h250v-300h500v300h100l200 -200v-218l-276 -275l-120 120l-126 -127h-378v-400h-150q-21 0 -35.5 14.5t-14.5 35.5zM581 306l123 123l120 -120l353 352l123 -123l-475 -476zM600 1000h100v200h-100v-200z" />
<glyph unicode="&#xe174;" d="M0 150v1000q0 20 14.5 35t35.5 15h250v-300h500v300h100l200 -200v-269l-103 -103l-170 170l-298 -298h-329v-400h-150q-21 0 -35.5 14.5t-14.5 35.5zM600 1000h100v200h-100v-200zM700 133l170 170l-170 170l127 127l170 -170l170 170l127 -128l-170 -169l170 -170 l-127 -127l-170 170l-170 -170z" />
<glyph unicode="&#xe175;" d="M0 150v1000q0 20 14.5 35t35.5 15h250v-300h500v300h100l200 -200v-300h-400v-200h-500v-400h-150q-21 0 -35.5 14.5t-14.5 35.5zM600 300l300 -300l300 300h-200v300h-200v-300h-200zM600 1000v200h100v-200h-100z" />
<glyph unicode="&#xe176;" d="M0 150v1000q0 20 14.5 35t35.5 15h250v-300h500v300h100l200 -200v-402l-200 200l-298 -298h-402v-400h-150q-21 0 -35.5 14.5t-14.5 35.5zM600 300h200v-300h200v300h200l-300 300zM600 1000v200h100v-200h-100z" />
<glyph unicode="&#xe177;" d="M0 250q0 -21 14.5 -35.5t35.5 -14.5h1100q21 0 35.5 14.5t14.5 35.5v550h-1200v-550zM0 900h1200v150q0 21 -14.5 35.5t-35.5 14.5h-1100q-21 0 -35.5 -14.5t-14.5 -35.5v-150zM100 300v200h400v-200h-400z" />
<glyph unicode="&#xe178;" d="M0 400l300 298v-198h400v-200h-400v-198zM100 800v200h100v-200h-100zM300 800v200h100v-200h-100zM500 800v200h400v198l300 -298l-300 -298v198h-400zM800 300v200h100v-200h-100zM1000 300h100v200h-100v-200z" />
<glyph unicode="&#xe179;" d="M100 700v400l50 100l50 -100v-300h100v300l50 100l50 -100v-300h100v300l50 100l50 -100v-400l-100 -203v-447q0 -21 -14.5 -35.5t-35.5 -14.5h-200q-21 0 -35.5 14.5t-14.5 35.5v447zM800 597q0 -29 10.5 -55.5t25 -43t29 -28.5t25.5 -18l10 -5v-397q0 -21 14.5 -35.5 t35.5 -14.5h200q21 0 35.5 14.5t14.5 35.5v1106q0 31 -18 40.5t-44 -7.5l-276 -116q-25 -17 -43.5 -51.5t-18.5 -65.5v-359z" />
<glyph unicode="&#xe180;" d="M100 0h400v56q-75 0 -87.5 6t-12.5 44v394h500v-394q0 -38 -12.5 -44t-87.5 -6v-56h400v56q-4 0 -11 0.5t-24 3t-30 7t-24 15t-11 24.5v888q0 22 25 34.5t50 13.5l25 2v56h-400v-56q75 0 87.5 -6t12.5 -44v-394h-500v394q0 38 12.5 44t87.5 6v56h-400v-56q4 0 11 -0.5 t24 -3t30 -7t24 -15t11 -24.5v-888q0 -22 -25 -34.5t-50 -13.5l-25 -2v-56z" />
<glyph unicode="&#xe181;" d="M0 300q0 -41 29.5 -70.5t70.5 -29.5h300q41 0 70.5 29.5t29.5 70.5v500q0 41 -29.5 70.5t-70.5 29.5h-300q-41 0 -70.5 -29.5t-29.5 -70.5v-500zM100 100h400l200 200h105l295 98v-298h-425l-100 -100h-375zM100 300v200h300v-200h-300zM100 600v200h300v-200h-300z M100 1000h400l200 -200v-98l295 98h105v200h-425l-100 100h-375zM700 402v163l400 133v-163z" />
<glyph unicode="&#xe182;" d="M16.5 974.5q0.5 -21.5 16 -90t46.5 -140t104 -177.5t175 -208q103 -103 207.5 -176t180 -103.5t137 -47t92.5 -16.5l31 1l163 162q17 18 13.5 41t-22.5 37l-192 136q-19 14 -45 12t-42 -19l-118 -118q-142 101 -268 227t-227 268l118 118q17 17 20 41.5t-11 44.5 l-139 194q-14 19 -36.5 22t-40.5 -14l-162 -162q-1 -11 -0.5 -32.5z" />
<glyph unicode="&#xe183;" d="M0 50v212q0 20 10.5 45.5t24.5 39.5l365 303v50q0 4 1 10.5t12 22.5t30 28.5t60 23t97 10.5t97 -10t60 -23.5t30 -27.5t12 -24l1 -10v-50l365 -303q14 -14 24.5 -39.5t10.5 -45.5v-212q0 -21 -14.5 -35.5t-35.5 -14.5h-1100q-20 0 -35 14.5t-15 35.5zM0 712 q0 -21 14.5 -33.5t34.5 -8.5l202 33q20 4 34.5 21t14.5 38v146q141 24 300 24t300 -24v-146q0 -21 14.5 -38t34.5 -21l202 -33q20 -4 34.5 8.5t14.5 33.5v200q-6 8 -19 20.5t-63 45t-112 57t-171 45t-235 20.5q-92 0 -175 -10.5t-141.5 -27t-108.5 -36.5t-81.5 -40 t-53.5 -36.5t-31 -27.5l-9 -10v-200z" />
<glyph unicode="&#xe184;" d="M100 0v100h1100v-100h-1100zM175 200h950l-125 150v250l100 100v400h-100v-200h-100v200h-200v-200h-100v200h-200v-200h-100v200h-100v-400l100 -100v-250z" />
<glyph unicode="&#xe185;" d="M100 0h300v400q0 41 -29.5 70.5t-70.5 29.5h-100q-41 0 -70.5 -29.5t-29.5 -70.5v-400zM500 0v1000q0 41 29.5 70.5t70.5 29.5h100q41 0 70.5 -29.5t29.5 -70.5v-1000h-300zM900 0v700q0 41 29.5 70.5t70.5 29.5h100q41 0 70.5 -29.5t29.5 -70.5v-700h-300z" />
<glyph unicode="&#xe186;" d="M-100 300v500q0 124 88 212t212 88h700q124 0 212 -88t88 -212v-500q0 -124 -88 -212t-212 -88h-700q-124 0 -212 88t-88 212zM100 200h900v700h-900v-700zM200 300h300v300h-200v100h200v100h-300v-300h200v-100h-200v-100zM600 300h200v100h100v300h-100v100h-200v-500 zM700 400v300h100v-300h-100z" />
<glyph unicode="&#xe187;" d="M-100 300v500q0 124 88 212t212 88h700q124 0 212 -88t88 -212v-500q0 -124 -88 -212t-212 -88h-700q-124 0 -212 88t-88 212zM100 200h900v700h-900v-700zM200 300h100v200h100v-200h100v500h-100v-200h-100v200h-100v-500zM600 300h200v100h100v300h-100v100h-200v-500 zM700 400v300h100v-300h-100z" />
<glyph unicode="&#xe188;" d="M-100 300v500q0 124 88 212t212 88h700q124 0 212 -88t88 -212v-500q0 -124 -88 -212t-212 -88h-700q-124 0 -212 88t-88 212zM100 200h900v700h-900v-700zM200 300h300v100h-200v300h200v100h-300v-500zM600 300h300v100h-200v300h200v100h-300v-500z" />
<glyph unicode="&#xe189;" d="M-100 300v500q0 124 88 212t212 88h700q124 0 212 -88t88 -212v-500q0 -124 -88 -212t-212 -88h-700q-124 0 -212 88t-88 212zM100 200h900v700h-900v-700zM200 550l300 -150v300zM600 400l300 150l-300 150v-300z" />
<glyph unicode="&#xe190;" d="M-100 300v500q0 124 88 212t212 88h700q124 0 212 -88t88 -212v-500q0 -124 -88 -212t-212 -88h-700q-124 0 -212 88t-88 212zM100 200h900v700h-900v-700zM200 300v500h700v-500h-700zM300 400h130q41 0 68 42t27 107t-28.5 108t-66.5 43h-130v-300zM575 549 q0 -65 27 -107t68 -42h130v300h-130q-38 0 -66.5 -43t-28.5 -108z" />
<glyph unicode="&#xe191;" d="M-100 300v500q0 124 88 212t212 88h700q124 0 212 -88t88 -212v-500q0 -124 -88 -212t-212 -88h-700q-124 0 -212 88t-88 212zM100 200h900v700h-900v-700zM200 300h300v300h-200v100h200v100h-300v-300h200v-100h-200v-100zM601 300h100v100h-100v-100zM700 700h100 v-400h100v500h-200v-100z" />
<glyph unicode="&#xe192;" d="M-100 300v500q0 124 88 212t212 88h700q124 0 212 -88t88 -212v-500q0 -124 -88 -212t-212 -88h-700q-124 0 -212 88t-88 212zM100 200h900v700h-900v-700zM200 300h300v400h-200v100h-100v-500zM301 400v200h100v-200h-100zM601 300h100v100h-100v-100zM700 700h100 v-400h100v500h-200v-100z" />
<glyph unicode="&#xe193;" d="M-100 300v500q0 124 88 212t212 88h700q124 0 212 -88t88 -212v-500q0 -124 -88 -212t-212 -88h-700q-124 0 -212 88t-88 212zM100 200h900v700h-900v-700zM200 700v100h300v-300h-99v-100h-100v100h99v200h-200zM201 300v100h100v-100h-100zM601 300v100h100v-100h-100z M700 700v100h200v-500h-100v400h-100z" />
<glyph unicode="&#xe194;" d="M4 600q0 162 80 299t217 217t299 80t299 -80t217 -217t80 -299t-80 -299t-217 -217t-299 -80t-299 80t-217 217t-80 299zM186 600q0 -171 121.5 -292.5t292.5 -121.5t292.5 121.5t121.5 292.5t-121.5 292.5t-292.5 121.5t-292.5 -121.5t-121.5 -292.5zM400 500v200 l100 100h300v-100h-300v-200h300v-100h-300z" />
<glyph unicode="&#xe195;" d="M0 600q0 162 80 299t217 217t299 80t299 -80t217 -217t80 -299t-80 -299t-217 -217t-299 -80t-299 80t-217 217t-80 299zM182 600q0 -171 121.5 -292.5t292.5 -121.5t292.5 121.5t121.5 292.5t-121.5 292.5t-292.5 121.5t-292.5 -121.5t-121.5 -292.5zM400 400v400h300 l100 -100v-100h-100v100h-200v-100h200v-100h-200v-100h-100zM700 400v100h100v-100h-100z" />
<glyph unicode="&#xe197;" d="M-14 494q0 -80 56.5 -137t135.5 -57h222v300h400v-300h128q120 0 205 86.5t85 207.5t-85 207t-205 86q-46 0 -90 -14q-44 97 -134.5 156.5t-200.5 59.5q-152 0 -260 -107.5t-108 -260.5q0 -25 2 -37q-66 -14 -108.5 -67.5t-42.5 -122.5zM300 200h200v300h200v-300h200 l-300 -300z" />
<glyph unicode="&#xe198;" d="M-14 494q0 -80 56.5 -137t135.5 -57h8l414 414l403 -403q94 26 154.5 104.5t60.5 178.5q0 120 -85 206.5t-205 86.5q-46 0 -90 -14q-44 97 -134.5 156.5t-200.5 59.5q-152 0 -260 -107.5t-108 -260.5q0 -25 2 -37q-66 -14 -108.5 -67.5t-42.5 -122.5zM300 200l300 300 l300 -300h-200v-300h-200v300h-200z" />
<glyph unicode="&#xe199;" d="M100 200h400v-155l-75 -45h350l-75 45v155h400l-270 300h170l-270 300h170l-300 333l-300 -333h170l-270 -300h170z" />
<glyph unicode="&#xe200;" d="M121 700q0 -53 28.5 -97t75.5 -65q-4 -16 -4 -38q0 -74 52.5 -126.5t126.5 -52.5q56 0 100 30v-306l-75 -45h350l-75 45v306q46 -30 100 -30q74 0 126.5 52.5t52.5 126.5q0 24 -9 55q50 32 79.5 83t29.5 112q0 90 -61.5 155.5t-150.5 71.5q-26 89 -99.5 145.5 t-167.5 56.5q-116 0 -197.5 -81.5t-81.5 -197.5q0 -4 1 -11.5t1 -11.5q-14 2 -23 2q-74 0 -126.5 -52.5t-52.5 -126.5z" />
</font>
</defs></svg>

Before

Width:  |  Height:  |  Size: 62 KiB

File diff suppressed because it is too large Load Diff

File diff suppressed because one or more lines are too long

View File

@ -1,7 +0,0 @@
/**
* @preserve HTML5 Shiv 3.7.2 | @afarkas @jdalton @jon_neal @rem | MIT/GPL2 Licensed
*/
// Only run this code in IE 8
if (!!window.navigator.userAgent.match("MSIE 8")) {
!function(a,b){function c(a,b){var c=a.createElement("p"),d=a.getElementsByTagName("head")[0]||a.documentElement;return c.innerHTML="x<style>"+b+"</style>",d.insertBefore(c.lastChild,d.firstChild)}function d(){var a=t.elements;return"string"==typeof a?a.split(" "):a}function e(a,b){var c=t.elements;"string"!=typeof c&&(c=c.join(" ")),"string"!=typeof a&&(a=a.join(" ")),t.elements=c+" "+a,j(b)}function f(a){var b=s[a[q]];return b||(b={},r++,a[q]=r,s[r]=b),b}function g(a,c,d){if(c||(c=b),l)return c.createElement(a);d||(d=f(c));var e;return e=d.cache[a]?d.cache[a].cloneNode():p.test(a)?(d.cache[a]=d.createElem(a)).cloneNode():d.createElem(a),!e.canHaveChildren||o.test(a)||e.tagUrn?e:d.frag.appendChild(e)}function h(a,c){if(a||(a=b),l)return a.createDocumentFragment();c=c||f(a);for(var e=c.frag.cloneNode(),g=0,h=d(),i=h.length;i>g;g++)e.createElement(h[g]);return e}function i(a,b){b.cache||(b.cache={},b.createElem=a.createElement,b.createFrag=a.createDocumentFragment,b.frag=b.createFrag()),a.createElement=function(c){return t.shivMethods?g(c,a,b):b.createElem(c)},a.createDocumentFragment=Function("h,f","return function(){var n=f.cloneNode(),c=n.createElement;h.shivMethods&&("+d().join().replace(/[\w\-:]+/g,function(a){return b.createElem(a),b.frag.createElement(a),'c("'+a+'")'})+");return n}")(t,b.frag)}function j(a){a||(a=b);var d=f(a);return!t.shivCSS||k||d.hasCSS||(d.hasCSS=!!c(a,"article,aside,dialog,figcaption,figure,footer,header,hgroup,main,nav,section{display:block}mark{background:#FF0;color:#000}template{display:none}")),l||i(a,d),a}var k,l,m="3.7.2",n=a.html5||{},o=/^<|^(?:button|map|select|textarea|object|iframe|option|optgroup)$/i,p=/^(?:a|b|code|div|fieldset|h1|h2|h3|h4|h5|h6|i|label|li|ol|p|q|span|strong|style|table|tbody|td|th|tr|ul)$/i,q="_html5shiv",r=0,s={};!function(){try{var a=b.createElement("a");a.innerHTML="<xyz></xyz>",k="hidden"in a,l=1==a.childNodes.length||function(){b.createElement("a");var a=b.createDocumentFragment();return"undefined"==typeof a.cloneNode||"undefined"==typeof a.createDocumentFragment||"undefined"==typeof a.createElement}()}catch(c){k=!0,l=!0}}();var t={elements:n.elements||"abbr article aside audio bdi canvas data datalist details dialog figcaption figure footer header hgroup main mark meter nav output picture progress section summary template time video",version:m,shivCSS:n.shivCSS!==!1,supportsUnknownElements:l,shivMethods:n.shivMethods!==!1,type:"default",shivDocument:j,createElement:g,createDocumentFragment:h,addElements:e};a.html5=t,j(b)}(this,document);
};

View File

@ -1,7 +0,0 @@
/*! Respond.js v1.4.2: min/max-width media query polyfill * Copyright 2013 Scott Jehl
* Licensed under https://github.com/scottjehl/Respond/blob/master/LICENSE-MIT
* */
if (!!window.navigator.userAgent.match("MSIE 8")) {
!function(a){"use strict";a.matchMedia=a.matchMedia||function(a){var b,c=a.documentElement,d=c.firstElementChild||c.firstChild,e=a.createElement("body"),f=a.createElement("div");return f.id="mq-test-1",f.style.cssText="position:absolute;top:-100em",e.style.background="none",e.appendChild(f),function(a){return f.innerHTML='&shy;<style media="'+a+'"> #mq-test-1 { width: 42px; }</style>',c.insertBefore(e,d),b=42===f.offsetWidth,c.removeChild(e),{matches:b,media:a}}}(a.document)}(this),function(a){"use strict";function b(){u(!0)}var c={};a.respond=c,c.update=function(){};var d=[],e=function(){var b=!1;try{b=new a.XMLHttpRequest}catch(c){b=new a.ActiveXObject("Microsoft.XMLHTTP")}return function(){return b}}(),f=function(a,b){var c=e();c&&(c.open("GET",a,!0),c.onreadystatechange=function(){4!==c.readyState||200!==c.status&&304!==c.status||b(c.responseText)},4!==c.readyState&&c.send(null))};if(c.ajax=f,c.queue=d,c.regex={media:/@media[^\{]+\{([^\{\}]*\{[^\}\{]*\})+/gi,keyframes:/@(?:\-(?:o|moz|webkit)\-)?keyframes[^\{]+\{(?:[^\{\}]*\{[^\}\{]*\})+[^\}]*\}/gi,urls:/(url\()['"]?([^\/\)'"][^:\)'"]+)['"]?(\))/g,findStyles:/@media *([^\{]+)\{([\S\s]+?)$/,only:/(only\s+)?([a-zA-Z]+)\s?/,minw:/\([\s]*min\-width\s*:[\s]*([\s]*[0-9\.]+)(px|em)[\s]*\)/,maxw:/\([\s]*max\-width\s*:[\s]*([\s]*[0-9\.]+)(px|em)[\s]*\)/},c.mediaQueriesSupported=a.matchMedia&&null!==a.matchMedia("only all")&&a.matchMedia("only all").matches,!c.mediaQueriesSupported){var g,h,i,j=a.document,k=j.documentElement,l=[],m=[],n=[],o={},p=30,q=j.getElementsByTagName("head")[0]||k,r=j.getElementsByTagName("base")[0],s=q.getElementsByTagName("link"),t=function(){var a,b=j.createElement("div"),c=j.body,d=k.style.fontSize,e=c&&c.style.fontSize,f=!1;return b.style.cssText="position:absolute;font-size:1em;width:1em",c||(c=f=j.createElement("body"),c.style.background="none"),k.style.fontSize="100%",c.style.fontSize="100%",c.appendChild(b),f&&k.insertBefore(c,k.firstChild),a=b.offsetWidth,f?k.removeChild(c):c.removeChild(b),k.style.fontSize=d,e&&(c.style.fontSize=e),a=i=parseFloat(a)},u=function(b){var c="clientWidth",d=k[c],e="CSS1Compat"===j.compatMode&&d||j.body[c]||d,f={},o=s[s.length-1],r=(new Date).getTime();if(b&&g&&p>r-g)return a.clearTimeout(h),h=a.setTimeout(u,p),void 0;g=r;for(var v in l)if(l.hasOwnProperty(v)){var w=l[v],x=w.minw,y=w.maxw,z=null===x,A=null===y,B="em";x&&(x=parseFloat(x)*(x.indexOf(B)>-1?i||t():1)),y&&(y=parseFloat(y)*(y.indexOf(B)>-1?i||t():1)),w.hasquery&&(z&&A||!(z||e>=x)||!(A||y>=e))||(f[w.media]||(f[w.media]=[]),f[w.media].push(m[w.rules]))}for(var C in n)n.hasOwnProperty(C)&&n[C]&&n[C].parentNode===q&&q.removeChild(n[C]);n.length=0;for(var D in f)if(f.hasOwnProperty(D)){var E=j.createElement("style"),F=f[D].join("\n");E.type="text/css",E.media=D,q.insertBefore(E,o.nextSibling),E.styleSheet?E.styleSheet.cssText=F:E.appendChild(j.createTextNode(F)),n.push(E)}},v=function(a,b,d){var e=a.replace(c.regex.keyframes,"").match(c.regex.media),f=e&&e.length||0;b=b.substring(0,b.lastIndexOf("/"));var g=function(a){return a.replace(c.regex.urls,"$1"+b+"$2$3")},h=!f&&d;b.length&&(b+="/"),h&&(f=1);for(var i=0;f>i;i++){var j,k,n,o;h?(j=d,m.push(g(a))):(j=e[i].match(c.regex.findStyles)&&RegExp.$1,m.push(RegExp.$2&&g(RegExp.$2))),n=j.split(","),o=n.length;for(var p=0;o>p;p++)k=n[p],l.push({media:k.split("(")[0].match(c.regex.only)&&RegExp.$2||"all",rules:m.length-1,hasquery:k.indexOf("(")>-1,minw:k.match(c.regex.minw)&&parseFloat(RegExp.$1)+(RegExp.$2||""),maxw:k.match(c.regex.maxw)&&parseFloat(RegExp.$1)+(RegExp.$2||"")})}u()},w=function(){if(d.length){var b=d.shift();f(b.href,function(c){v(c,b.href,b.media),o[b.href]=!0,a.setTimeout(function(){w()},0)})}},x=function(){for(var b=0;b<s.length;b++){var c=s[b],e=c.href,f=c.media,g=c.rel&&"stylesheet"===c.rel.toLowerCase();e&&g&&!o[e]&&(c.styleSheet&&c.styleSheet.rawCssText?(v(c.styleSheet.rawCssText,e,f),o[e]=!0):(!/^([a-zA-Z:]*\/\/)/.test(e)&&!r||e.replace(RegExp.$1,"").split("/")[0]===a.location.host)&&("//"===e.substring(0,2)&&(e=a.location.protocol+e),d.push({href:e,media:f})))}w()};x(),c.update=x,c.getEmValue=t,a.addEventListener?a.addEventListener("resize",b,!1):a.attachEvent&&a.attachEvent("onresize",b)}}(this);
};

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

Binary file not shown.

Before

Width:  |  Height:  |  Size: 8.6 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 12 KiB

View File

@ -1,131 +0,0 @@
/* Affix navigation to top-left */
#nav.affix {
position: static;
}
@media screen and (min-width: 768px) {
#nav.affix {
position: fixed;
top: 10px;
}
#nav.affix-bottom {
position: absolute;
}
}
/* Ensure navigation has consistent width when scrolling
http://getbootstrap.com/css/#grid-options shows the column width/screen width mapping */
@media (min-width: 768px) {
#nav {
width: 186px; /* 62*3 */
}
}
@media (min-width: 992px) {
#nav {
width: 243px; /* 81*3 */
}
}
@media (min-width: 1200px) {
#nav {
width: 291px; /* 97*3 */
}
}
/* Tweak code styling */
code {
padding: 1px;
}
/* Tweak navigation list styling */
ul.toc {
padding-left: 0px;
}
ul.toc .dropdown-header {
padding: 5px 0 0 0;
}
ul .dropdown-header:first-child {
margin-top: 2px;
}
.dropdown-header {
color: #333;
margin-top: 8px;
}
.dropdown-header a:link, .dropdown-header a:visited {
font-weight: bold;
padding-left: 0;
}
.dropdown-menu .dropdown-header {
border-bottom: 1px solid #eee;
}
/* Syntax highlighting */
pre, code {
font-family: 'Inconsolata', sans-serif;
font-size: 1em;
background-color: #fafafa;
}
pre {
border-color: #ddd;
}
code {
color: #333;
white-space: normal;
}
table.sourceCode, tr.sourceCode, td.lineNumbers, td.sourceCode {
margin: 0; padding: 0; vertical-align: baseline; border: none; }
table.sourceCode { width: 100%; line-height: 100%; }
td.lineNumbers { text-align: right; padding-right: 4px; padding-left: 4px; color: #aaaaaa; border-right: 1px solid #aaaaaa; }
td.sourceCode { padding-left: 5px; }
/* Class described in https://benjeffrey.com/posts/pandoc-syntax-highlighting-css
Colours from https://gist.github.com/robsimmons/1172277 */
code > span.kw { color: #555; font-weight: bold; } /* Keyword */
code > span.dt { color: #902000; } /* DataType */
code > span.dv { color: #40a070; } /* DecVal (decimal values) */
code > span.bn { color: #d14; } /* BaseN */
code > span.fl { color: #d14; } /* Float */
code > span.ch { color: #d14; } /* Char */
code > span.st { color: #d14; } /* String */
code > span.co { color: #888888; font-style: italic; } /* Comment */
code > span.ot { color: #007020; } /* OtherToken */
code > span.al { color: #ff0000; font-weight: bold; } /* AlertToken */
code > span.fu { color: #900; font-weight: bold; } /* Function calls */
code > span.er { color: #a61717; background-color: #e3d2d2; } /* ErrorTok */
/* Tables */
table {
width: 100%;
margin-bottom: 20px;
}
table thead > tr > th,
table tbody > tr > th,
table tfoot > tr > th,
table thead > tr > td,
table tbody > tr > td,
table tfoot > tr > td {
padding: 8px;
line-height: 1.428571429;
vertical-align: top;
border-top: 1px solid #dddddd;
}
table thead > tr > th {
vertical-align: bottom;
border-bottom: 2px solid #dddddd;
}
table tr.odd {
background-color: #fafafa;
}

View File

@ -1,625 +0,0 @@
(function() {
// If window.HTMLWidgets is already defined, then use it; otherwise create a
// new object. This allows preceding code to set options that affect the
// initialization process (though none currently exist).
window.HTMLWidgets = window.HTMLWidgets || {};
// See if we're running in a viewer pane. If not, we're in a web browser.
var viewerMode = window.HTMLWidgets.viewerMode =
/\bviewer_pane=1\b/.test(window.location);
// See if we're running in Shiny mode. If not, it's a static document.
// Note that static widgets can appear in both Shiny and static modes, but
// obviously, Shiny widgets can only appear in Shiny apps/documents.
var shinyMode = window.HTMLWidgets.shinyMode =
typeof(window.Shiny) !== "undefined" && !!window.Shiny.outputBindings;
// We can't count on jQuery being available, so we implement our own
// version if necessary.
function querySelectorAll(scope, selector) {
if (typeof(jQuery) !== "undefined" && scope instanceof jQuery) {
return scope.find(selector);
}
if (scope.querySelectorAll) {
return scope.querySelectorAll(selector);
}
}
function asArray(value) {
if (value === null)
return [];
if ($.isArray(value))
return value;
return [value];
}
// Implement jQuery's extend
function extend(target /*, ... */) {
if (arguments.length == 1) {
return target;
}
for (var i = 1; i < arguments.length; i++) {
var source = arguments[i];
for (var prop in source) {
if (source.hasOwnProperty(prop)) {
target[prop] = source[prop];
}
}
}
return target;
}
// IE8 doesn't support Array.forEach.
function forEach(values, callback, thisArg) {
if (values.forEach) {
values.forEach(callback, thisArg);
} else {
for (var i = 0; i < values.length; i++) {
callback.call(thisArg, values[i], i, values);
}
}
}
// Replaces the specified method with the return value of funcSource.
//
// Note that funcSource should not BE the new method, it should be a function
// that RETURNS the new method. funcSource receives a single argument that is
// the overridden method, it can be called from the new method. The overridden
// method can be called like a regular function, it has the target permanently
// bound to it so "this" will work correctly.
function overrideMethod(target, methodName, funcSource) {
var superFunc = target[methodName] || function() {};
var superFuncBound = function() {
return superFunc.apply(target, arguments);
};
target[methodName] = funcSource(superFuncBound);
}
// Implement a vague facsimilie of jQuery's data method
function elementData(el, name, value) {
if (arguments.length == 2) {
return el["htmlwidget_data_" + name];
} else if (arguments.length == 3) {
el["htmlwidget_data_" + name] = value;
return el;
} else {
throw new Error("Wrong number of arguments for elementData: " +
arguments.length);
}
}
// http://stackoverflow.com/questions/3446170/escape-string-for-use-in-javascript-regex
function escapeRegExp(str) {
return str.replace(/[\-\[\]\/\{\}\(\)\*\+\?\.\\\^\$\|]/g, "\\$&");
}
function hasClass(el, className) {
var re = new RegExp("\\b" + escapeRegExp(className) + "\\b");
return re.test(el.className);
}
// elements - array (or array-like object) of HTML elements
// className - class name to test for
// include - if true, only return elements with given className;
// if false, only return elements *without* given className
function filterByClass(elements, className, include) {
var results = [];
for (var i = 0; i < elements.length; i++) {
if (hasClass(elements[i], className) == include)
results.push(elements[i]);
}
return results;
}
function on(obj, eventName, func) {
if (obj.addEventListener) {
obj.addEventListener(eventName, func, false);
} else if (obj.attachEvent) {
obj.attachEvent(eventName, func);
}
}
function off(obj, eventName, func) {
if (obj.removeEventListener)
obj.removeEventListener(eventName, func, false);
else if (obj.detachEvent) {
obj.detachEvent(eventName, func);
}
}
// Translate array of values to top/right/bottom/left, as usual with
// the "padding" CSS property
// https://developer.mozilla.org/en-US/docs/Web/CSS/padding
function unpackPadding(value) {
if (typeof(value) === "number")
value = [value];
if (value.length === 1) {
return {top: value[0], right: value[0], bottom: value[0], left: value[0]};
}
if (value.length === 2) {
return {top: value[0], right: value[1], bottom: value[0], left: value[1]};
}
if (value.length === 3) {
return {top: value[0], right: value[1], bottom: value[2], left: value[1]};
}
if (value.length === 4) {
return {top: value[0], right: value[1], bottom: value[2], left: value[3]};
}
}
// Convert an unpacked padding object to a CSS value
function paddingToCss(paddingObj) {
return paddingObj.top + "px " + paddingObj.right + "px " + paddingObj.bottom + "px " + paddingObj.left + "px";
}
// Makes a number suitable for CSS
function px(x) {
if (typeof(x) === "number")
return x + "px";
else
return x;
}
// Retrieves runtime widget sizing information for an element.
// The return value is either null, or an object with fill, padding,
// defaultWidth, defaultHeight fields.
function sizingPolicy(el) {
var sizingEl = document.querySelector("script[data-for='" + el.id + "'][type='application/htmlwidget-sizing']");
if (!sizingEl)
return null;
var sp = JSON.parse(sizingEl.textContent || sizingEl.text || "{}");
if (viewerMode) {
return sp.viewer;
} else {
return sp.browser;
}
}
function initSizing(el) {
var sizing = sizingPolicy(el);
if (!sizing)
return;
var cel = document.getElementById("htmlwidget_container");
if (!cel)
return;
if (typeof(sizing.padding) !== "undefined") {
document.body.style.margin = "0";
document.body.style.padding = paddingToCss(unpackPadding(sizing.padding));
}
if (sizing.fill) {
document.body.style.overflow = "hidden";
document.body.style.width = "100%";
document.body.style.height = "100%";
document.documentElement.style.width = "100%";
document.documentElement.style.height = "100%";
if (cel) {
cel.style.position = "absolute";
var pad = unpackPadding(sizing.padding);
cel.style.top = pad.top + "px";
cel.style.right = pad.right + "px";
cel.style.bottom = pad.bottom + "px";
cel.style.left = pad.left + "px";
el.style.width = "100%";
el.style.height = "100%";
}
return {
getWidth: function() { return cel.offsetWidth; },
getHeight: function() { return cel.offsetHeight; }
};
} else {
el.style.width = px(sizing.width);
el.style.height = px(sizing.height);
return {
getWidth: function() { return el.offsetWidth; },
getHeight: function() { return el.offsetHeight; }
};
}
}
// Default implementations for methods
var defaults = {
find: function(scope) {
return querySelectorAll(scope, "." + this.name);
},
renderError: function(el, err) {
var $el = $(el);
this.clearError(el);
// Add all these error classes, as Shiny does
var errClass = "shiny-output-error";
if (err.type !== null) {
// use the classes of the error condition as CSS class names
errClass = errClass + " " + $.map(asArray(err.type), function(type) {
return errClass + "-" + type;
}).join(" ");
}
errClass = errClass + " htmlwidgets-error";
// Is el inline or block? If inline or inline-block, just display:none it
// and add an inline error.
var display = $el.css("display");
$el.data("restore-display-mode", display);
if (display === "inline" || display === "inline-block") {
$el.hide();
if (err.message !== "") {
var errorSpan = $("<span>").addClass(errClass);
errorSpan.text(err.message);
$el.after(errorSpan);
}
} else if (display === "block") {
// If block, add an error just after the el, set visibility:none on the
// el, and position the error to be on top of the el.
// Mark it with a unique ID and CSS class so we can remove it later.
$el.css("visibility", "hidden");
if (err.message !== "") {
var errorDiv = $("<div>").addClass(errClass).css("position", "absolute")
.css("top", el.offsetTop)
.css("left", el.offsetLeft)
// setting width can push out the page size, forcing otherwise
// unnecessary scrollbars to appear and making it impossible for
// the element to shrink; so use max-width instead
.css("maxWidth", el.offsetWidth)
.css("height", el.offsetHeight);
errorDiv.text(err.message);
$el.after(errorDiv);
// Really dumb way to keep the size/position of the error in sync with
// the parent element as the window is resized or whatever.
var intId = setInterval(function() {
if (!errorDiv[0].parentElement) {
clearInterval(intId);
return;
}
errorDiv
.css("top", el.offsetTop)
.css("left", el.offsetLeft)
.css("maxWidth", el.offsetWidth)
.css("height", el.offsetHeight);
}, 500);
}
}
},
clearError: function(el) {
var $el = $(el);
var display = $el.data("restore-display-mode");
$el.data("restore-display-mode", null);
if (display === "inline" || display === "inline-block") {
if (display)
$el.css("display", display);
$(el.nextSibling).filter(".htmlwidgets-error").remove();
} else if (display === "block"){
$el.css("visibility", "inherit");
$(el.nextSibling).filter(".htmlwidgets-error").remove();
}
},
sizing: {}
};
// Called by widget bindings to register a new type of widget. The definition
// object can contain the following properties:
// - name (required) - A string indicating the binding name, which will be
// used by default as the CSS classname to look for.
// - initialize (optional) - A function(el) that will be called once per
// widget element; if a value is returned, it will be passed as the third
// value to renderValue.
// - renderValue (required) - A function(el, data, initValue) that will be
// called with data. Static contexts will cause this to be called once per
// element; Shiny apps will cause this to be called multiple times per
// element, as the data changes.
window.HTMLWidgets.widget = function(definition) {
if (!definition.name) {
throw new Error("Widget must have a name");
}
if (!definition.type) {
throw new Error("Widget must have a type");
}
// Currently we only support output widgets
if (definition.type !== "output") {
throw new Error("Unrecognized widget type '" + definition.type + "'");
}
// TODO: Verify that .name is a valid CSS classname
if (!definition.renderValue) {
throw new Error("Widget must have a renderValue function");
}
// For static rendering (non-Shiny), use a simple widget registration
// scheme. We also use this scheme for Shiny apps/documents that also
// contain static widgets.
window.HTMLWidgets.widgets = window.HTMLWidgets.widgets || [];
// Merge defaults into the definition; don't mutate the original definition.
var staticBinding = extend({}, defaults, definition);
overrideMethod(staticBinding, "find", function(superfunc) {
return function(scope) {
var results = superfunc(scope);
// Filter out Shiny outputs, we only want the static kind
return filterByClass(results, "html-widget-output", false);
};
});
window.HTMLWidgets.widgets.push(staticBinding);
if (shinyMode) {
// Shiny is running. Register the definition as an output binding.
// Merge defaults into the definition; don't mutate the original definition.
// The base object is a Shiny output binding if we're running in Shiny mode,
// or an empty object if we're not.
var shinyBinding = extend(new Shiny.OutputBinding(), defaults, definition);
// Wrap renderValue to handle initialization, which unfortunately isn't
// supported natively by Shiny at the time of this writing.
// NB: shinyBinding.initialize may be undefined, as it's optional.
// Rename initialize to make sure it isn't called by a future version
// of Shiny that does support initialize directly.
shinyBinding._htmlwidgets_initialize = shinyBinding.initialize;
delete shinyBinding.initialize;
overrideMethod(shinyBinding, "find", function(superfunc) {
return function(scope) {
var results = superfunc(scope);
// Only return elements that are Shiny outputs, not static ones
var dynamicResults = results.filter(".html-widget-output");
// It's possible that whatever caused Shiny to think there might be
// new dynamic outputs, also caused there to be new static outputs.
// Since there might be lots of different htmlwidgets bindings, we
// schedule execution for later--no need to staticRender multiple
// times.
if (results.length !== dynamicResults.length)
scheduleStaticRender();
return dynamicResults;
};
});
overrideMethod(shinyBinding, "renderValue", function(superfunc) {
return function(el, data) {
// Resolve strings marked as javascript literals to objects
if (!(data.evals instanceof Array)) data.evals = [data.evals];
for (var i = 0; data.evals && i < data.evals.length; i++) {
window.HTMLWidgets.evaluateStringMember(data.x, data.evals[i]);
}
if (!this.renderOnNullValue) {
if (data.x === null) {
el.style.visibility = "hidden";
return;
} else {
el.style.visibility = "inherit";
}
}
if (!elementData(el, "initialized")) {
initSizing(el);
elementData(el, "initialized", true);
if (this._htmlwidgets_initialize) {
var result = this._htmlwidgets_initialize(el, el.offsetWidth,
el.offsetHeight);
elementData(el, "init_result", result);
}
}
Shiny.renderDependencies(data.deps);
superfunc(el, data.x, elementData(el, "init_result"));
};
});
overrideMethod(shinyBinding, "resize", function(superfunc) {
return function(el, width, height) {
// Shiny can call resize before initialize/renderValue have been
// called, which doesn't make sense for widgets.
if (elementData(el, "initialized")) {
superfunc(el, width, height, elementData(el, "init_result"));
}
};
});
Shiny.outputBindings.register(shinyBinding, shinyBinding.name);
}
};
var scheduleStaticRenderTimerId = null;
function scheduleStaticRender() {
if (!scheduleStaticRenderTimerId) {
scheduleStaticRenderTimerId = setTimeout(function() {
scheduleStaticRenderTimerId = null;
window.HTMLWidgets.staticRender();
}, 1);
}
}
// Render static widgets after the document finishes loading
// Statically render all elements that are of this widget's class
window.HTMLWidgets.staticRender = function() {
var bindings = window.HTMLWidgets.widgets || [];
forEach(bindings, function(binding) {
var matches = binding.find(document.documentElement);
forEach(matches, function(el) {
var sizeObj = initSizing(el, binding);
if (hasClass(el, "html-widget-static-bound"))
return;
el.className = el.className + " html-widget-static-bound";
var initResult;
if (binding.initialize) {
initResult = binding.initialize(el,
sizeObj ? sizeObj.getWidth() : el.offsetWidth,
sizeObj ? sizeObj.getHeight() : el.offsetHeight
);
}
if (binding.resize) {
var lastSize = {};
var resizeHandler = function(e) {
var size = {
w: sizeObj ? sizeObj.getWidth() : el.offsetWidth,
h: sizeObj ? sizeObj.getHeight() : el.offsetHeight
};
if (size.w === 0 && size.h === 0)
return;
if (size.w === lastSize.w && size.h === lastSize.h)
return;
lastSize = size;
binding.resize(el, size.w, size.h, initResult);
};
on(window, "resize", resizeHandler);
// This is needed for cases where we're running in a Shiny
// app, but the widget itself is not a Shiny output, but
// rather a simple static widget. One example of this is
// an rmarkdown document that has runtime:shiny and widget
// that isn't in a render function. Shiny only knows to
// call resize handlers for Shiny outputs, not for static
// widgets, so we do it ourselves.
if (window.jQuery) {
window.jQuery(document).on("shown", resizeHandler);
window.jQuery(document).on("hidden", resizeHandler);
}
// This is needed for the specific case of ioslides, which
// flips slides between display:none and display:block.
// Ideally we would not have to have ioslide-specific code
// here, but rather have ioslides raise a generic event,
// but the rmarkdown package just went to CRAN so the
// window to getting that fixed may be long.
if (window.addEventListener) {
// It's OK to limit this to window.addEventListener
// browsers because ioslides itself only supports
// such browsers.
on(document, "slideenter", resizeHandler);
on(document, "slideleave", resizeHandler);
}
}
var scriptData = document.querySelector("script[data-for='" + el.id + "'][type='application/json']");
if (scriptData) {
var data = JSON.parse(scriptData.textContent || scriptData.text);
// Resolve strings marked as javascript literals to objects
if (!(data.evals instanceof Array)) data.evals = [data.evals];
for (var k = 0; data.evals && k < data.evals.length; k++) {
window.HTMLWidgets.evaluateStringMember(data.x, data.evals[k]);
}
binding.renderValue(el, data.x, initResult);
}
});
});
}
// Wait until after the document has loaded to render the widgets.
if (document.addEventListener) {
document.addEventListener("DOMContentLoaded", function() {
document.removeEventListener("DOMContentLoaded", arguments.callee, false);
window.HTMLWidgets.staticRender();
}, false);
} else if (document.attachEvent) {
document.attachEvent("onreadystatechange", function() {
if (document.readyState === "complete") {
document.detachEvent("onreadystatechange", arguments.callee);
window.HTMLWidgets.staticRender();
}
});
}
window.HTMLWidgets.getAttachmentUrl = function(depname, key) {
// If no key, default to the first item
if (typeof(key) === "undefined")
key = 1;
var link = document.getElementById(depname + "-" + key + "-attachment");
if (!link) {
throw new Error("Attachment " + depname + "/" + key + " not found in document");
}
return link.getAttribute("href");
};
window.HTMLWidgets.dataframeToD3 = function(df) {
var names = [];
var length;
for (var name in df) {
if (df.hasOwnProperty(name))
names.push(name);
if (typeof(df[name]) !== "object" || typeof(df[name].length) === "undefined") {
throw new Error("All fields must be arrays");
} else if (typeof(length) !== "undefined" && length !== df[name].length) {
throw new Error("All fields must be arrays of the same length");
}
length = df[name].length;
}
var results = [];
var item;
for (var row = 0; row < length; row++) {
item = {};
for (var col = 0; col < names.length; col++) {
item[names[col]] = df[names[col]][row];
}
results.push(item);
}
return results;
};
window.HTMLWidgets.transposeArray2D = function(array) {
var newArray = array[0].map(function(col, i) {
return array.map(function(row) {
return row[i]
})
});
return newArray;
};
// Split value at splitChar, but allow splitChar to be escaped
// using escapeChar. Any other characters escaped by escapeChar
// will be included as usual (including escapeChar itself).
function splitWithEscape(value, splitChar, escapeChar) {
var results = [];
var escapeMode = false;
var currentResult = "";
for (var pos = 0; pos < value.length; pos++) {
if (!escapeMode) {
if (value[pos] === splitChar) {
results.push(currentResult);
currentResult = "";
} else if (value[pos] === escapeChar) {
escapeMode = true;
} else {
currentResult += value[pos];
}
} else {
currentResult += value[pos];
escapeMode = false;
}
}
if (currentResult !== "") {
results.push(currentResult);
}
return results;
}
// Function authored by Yihui/JJ Allaire
window.HTMLWidgets.evaluateStringMember = function(o, member) {
var parts = splitWithEscape(member, '.', '\\');
for (var i = 0, l = parts.length; i < l; i++) {
var part = parts[i];
// part may be a character or 'numeric' member name
if (o !== null && typeof o === "object" && part in o) {
if (i == (l - 1)) { // if we are at the end of the line then evalulate
if (typeof o[part] === "string")
o[part] = eval("(" + o[part] + ")");
} else { // otherwise continue to next embedded object
o = o[part];
}
}
}
};
})();

File diff suppressed because one or more lines are too long

View File

@ -1,11 +0,0 @@
.str_view ul, .str_view li {
list-style: none;
padding: 0;
margin: 0.5em 0;
font-family: monospace;
}
.str_view .match {
border: 1px solid #ccc;
background-color: #eee;
}

View File

@ -1,17 +0,0 @@
HTMLWidgets.widget({
name: 'str_view',
type: 'output',
initialize: function(el, width, height) {
},
renderValue: function(el, x, instance) {
el.innerHTML = x.html;
},
resize: function(el, width, height, instance) {
}
});

Some files were not shown because too many files have changed in this diff Show More