
tidyversemagrittrinstall.packages(c("tidyverse", "magrittr"))library(tidyverse)
library(magrittr)## ── Attaching packages ────────────────────────────────── tidyverse 1.2.1 ──
## ✔ ggplot2 2.2.1 ✔ purrr 0.2.4
## ✔ tibble 1.3.4 ✔ dplyr 0.7.4
## ✔ tidyr 0.7.2 ✔ stringr 1.2.0
## ✔ readr 1.1.1 ✔ forcats 0.2.0
## ── Conflicts ───────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
##
## Attaching package: 'magrittr'
## The following object is masked from 'package:purrr':
##
## set_names
## The following object is masked from 'package:tidyr':
##
## extract
tidyversedata()
starwars datasetstarwars## # A tibble: 87 x 13
## name height mass hair_color skin_color eye_color
## <chr> <int> <dbl> <chr> <chr> <chr>
## 1 Luke Skywalker 172 77 blond fair blue
## 2 C-3PO 167 75 <NA> gold yellow
## 3 R2-D2 96 32 <NA> white, blue red
## 4 Darth Vader 202 136 none white yellow
## 5 Leia Organa 150 49 brown light brown
## 6 Owen Lars 178 120 brown, grey light blue
## 7 Beru Whitesun lars 165 75 brown light blue
## 8 R5-D4 97 32 <NA> white, red red
## 9 Biggs Darklighter 183 84 black light brown
## 10 Obi-Wan Kenobi 182 77 auburn, white fair blue-gray
## # ... with 77 more rows, and 7 more variables: birth_year <dbl>,
## # gender <chr>, homeworld <chr>, species <chr>, films <list>,
## # vehicles <list>, starships <list>
> starwars
> print(starwars)
> str(starwars)
> head(starwars) # first 5 rows
glimpse(starwars) ## Observations: 87
## Variables: 13
## $ name <chr> "Luke Skywalker", "C-3PO", "R2-D2", "Darth Vader", ...
## $ height <int> 172, 167, 96, 202, 150, 178, 165, 97, 183, 182, 188...
## $ mass <dbl> 77.0, 75.0, 32.0, 136.0, 49.0, 120.0, 75.0, 32.0, 8...
## $ hair_color <chr> "blond", NA, NA, "none", "brown", "brown, grey", "b...
## $ skin_color <chr> "fair", "gold", "white, blue", "white", "light", "l...
## $ eye_color <chr> "blue", "yellow", "red", "yellow", "brown", "blue",...
## $ birth_year <dbl> 19.0, 112.0, 33.0, 41.9, 19.0, 52.0, 47.0, NA, 24.0...
## $ gender <chr> "male", NA, NA, "male", "female", "male", "female",...
## $ homeworld <chr> "Tatooine", "Tatooine", "Naboo", "Tatooine", "Alder...
## $ species <chr> "Human", "Droid", "Droid", "Human", "Human", "Human...
## $ films <list> [<"Revenge of the Sith", "Return of the Jedi", "Th...
## $ vehicles <list> [<"Snowspeeder", "Imperial Speeder Bike">, <>, <>,...
## $ starships <list> [<"X-wing", "Imperial shuttle">, <>, <>, "TIE Adva...
Click on the variable name in the Environments pane 
Will execute this code:
View(mtcars)

%>%sum(1:8) %>%
sqrt()## [1] 6
tidy-ness

table %>%
gather(`1999`, `2000`, key = "year", value = "cases")filter(starwars, height < 90)## # A tibble: 3 x 13
## name height mass hair_color skin_color eye_color
## <chr> <int> <dbl> <chr> <chr> <chr>
## 1 Yoda 66 17 white green brown
## 2 Wicket Systri Warrick 88 20 brown brown brown
## 3 Ratts Tyerell 79 15 none grey, blue unknown
## # ... with 7 more variables: birth_year <dbl>, gender <chr>,
## # homeworld <chr>, species <chr>, films <list>, vehicles <list>,
## # starships <list>
filter(starwars, height < 90) %>%
select(name, gender, species, height)## # A tibble: 3 x 4
## name gender species height
## <chr> <chr> <chr> <int>
## 1 Yoda male Yoda's species 66
## 2 Wicket Systri Warrick male Ewok 88
## 3 Ratts Tyerell male Aleena 79
filter(starwars, height < 90) %>%
select(name, gender, species, height) %>%
arrange(height)## # A tibble: 3 x 4
## name gender species height
## <chr> <chr> <chr> <int>
## 1 Yoda male Yoda's species 66
## 2 Ratts Tyerell male Aleena 79
## 3 Wicket Systri Warrick male Ewok 88
starwars %>%
na.omit() %>%
group_by(species) %>%
summarize(avg_mass = mean(mass))## # A tibble: 11 x 2
## species avg_mass
## <chr> <dbl>
## 1 Cerean 82.00000
## 2 Ewok 20.00000
## 3 Gungan 66.00000
## 4 Human 81.01111
## 5 Kel Dor 80.00000
## 6 Mirialan 53.10000
## 7 Mon Calamari 83.00000
## 8 Trandoshan 113.00000
## 9 Twi'lek 55.00000
## 10 Wookiee 112.00000
## 11 Zabrak 80.00000
starwars %>%
na.omit() %>%
group_by(species, gender) %>%
select(name, gender, species, mass) %>%
mutate(avg_mass = mean(mass))## # A tibble: 29 x 5
## # Groups: species, gender [12]
## name gender species mass avg_mass
## <chr> <chr> <chr> <dbl> <dbl>
## 1 Luke Skywalker male Human 77 85.94667
## 2 Darth Vader male Human 136 85.94667
## 3 Leia Organa female Human 49 56.33333
## 4 Owen Lars male Human 120 85.94667
## 5 Beru Whitesun lars female Human 75 56.33333
## 6 Biggs Darklighter male Human 84 85.94667
## 7 Obi-Wan Kenobi male Human 77 85.94667
## 8 Anakin Skywalker male Human 84 85.94667
## 9 Chewbacca male Wookiee 112 112.00000
## 10 Han Solo male Human 80 85.94667
## # ... with 19 more rows
starwars %>%
count(species) %>%
arrange(desc(n))## # A tibble: 38 x 2
## species n
## <chr> <int>
## 1 Human 35
## 2 Droid 5
## 3 <NA> 5
## 4 Gungan 3
## 5 Kaminoan 2
## 6 Mirialan 2
## 7 Twi'lek 2
## 8 Wookiee 2
## 9 Zabrak 2
## 10 Aleena 1
## # ... with 28 more rows
ggplotplot(starwars$height, type='p', col='red', pch=16)
| Factor | Scalar/Numeric |
|---|---|
| discrete | continuous |
| Example: Human, Droid, Wookie |
Example: 0.7, 1.2, 3.4 |
nums <- c(0.7, 1.2, 3.4)
factor_nums <- as.factor(nums)
levels(factor_nums)## [1] "0.7" "1.2" "3.4"
nums + 1## [1] 1.7 2.2 4.4
factor_nums + 1## Warning in Ops.factor(factor_nums, 1): '+' not meaningful for factors
## [1] NA NA NA
ggplot functionggplot(data=starwars, aes(x=height, y=mass)) + ...... + geom_point(), + geom_boxplot(), etc.
ggplot(subset(starwars, species %in% c('Droid', 'Human', "Gungan")),
aes(x=species, y=height)) +
geom_boxplot()
ggplot(starwars, aes(x=height, y=mass)) +
geom_point(size=5) +
stat_smooth(method='lm')
dplyr to ggplotstarwars %>%
filter(species == 'Human' & gender %in% c('male', 'female')) %>%
ggplot(aes(mass)) +
geom_histogram() +
facet_grid(. ~ gender)
%>%) can link many functions