library(readxl)
deaths <- read_excel("~/Graduate_Seminar_Presentation/deaths.xlsx")
deaths
## # A tibble: 18 x 6
## `Lots of people` X__1 X__2 X__3
## <chr> <chr> <chr> <chr>
## 1 simply cannot resist writing <NA> <NA> <NA>
## 2 at the top <NA>
## 3 or merging <NA> <NA>
## 4 Name Profession Age Has kids
## 5 David Bowie musician 69 TRUE
## 6 Carrie Fisher actor 60 TRUE
## 7 Chuck Berry musician 90 TRUE
## 8 Bill Paxton actor 61 TRUE
## 9 Prince musician 57 TRUE
## 10 Alan Rickman actor 69 FALSE
## 11 Florence Henderson actor 82 TRUE
## 12 Harper Lee author 89 FALSE
## 13 Zsa Zsa Gábor actor 99 TRUE
## 14 George Michael musician 53 FALSE
## 15 Some <NA> <NA> <NA>
## 16 <NA> also like to write stuff <NA> <NA>
## 17 <NA> <NA> at the bottom,
## 18 <NA> <NA> <NA> <NA>
## # ... with 2 more variables: X__4 <chr>, X__5 <chr>
library(readxl)
deaths <- read_excel("~/Graduate_Seminar_Presentation/deaths.xlsx",
range = cell_rows(5:15))
deaths
## # A tibble: 10 x 6
## Name Profession Age `Has kids` `Date of birth`
## <chr> <chr> <dbl> <lgl> <dttm>
## 1 David Bowie musician 69 TRUE 1947-01-08
## 2 Carrie Fisher actor 60 TRUE 1956-10-21
## 3 Chuck Berry musician 90 TRUE 1926-10-18
## 4 Bill Paxton actor 61 TRUE 1955-05-17
## 5 Prince musician 57 TRUE 1958-06-07
## 6 Alan Rickman actor 69 FALSE 1946-02-21
## 7 Florence Henderson actor 82 TRUE 1934-02-14
## 8 Harper Lee author 89 FALSE 1926-04-28
## 9 Zsa Zsa Gábor actor 99 TRUE 1917-02-06
## 10 George Michael musician 53 FALSE 1963-06-25
## # ... with 1 more variables: `Date of death` <dttm>
Create a new column: birthplace
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
deaths %>%
mutate(birthplace = c("UK", "US", "US", "US", "US",
"UK", "US", "US", "Hungary",
"UK"))
## # A tibble: 10 x 7
## Name Profession Age `Has kids` `Date of birth`
## <chr> <chr> <dbl> <lgl> <dttm>
## 1 David Bowie musician 69 TRUE 1947-01-08
## 2 Carrie Fisher actor 60 TRUE 1956-10-21
## 3 Chuck Berry musician 90 TRUE 1926-10-18
## 4 Bill Paxton actor 61 TRUE 1955-05-17
## 5 Prince musician 57 TRUE 1958-06-07
## 6 Alan Rickman actor 69 FALSE 1946-02-21
## 7 Florence Henderson actor 82 TRUE 1934-02-14
## 8 Harper Lee author 89 FALSE 1926-04-28
## 9 Zsa Zsa Gábor actor 99 TRUE 1917-02-06
## 10 George Michael musician 53 FALSE 1963-06-25
## # ... with 2 more variables: `Date of death` <dttm>, birthplace <chr>
Most of the celebrities are from US.
library(gapminder)
gapminder
## # A tibble: 1,704 x 6
## country continent year lifeExp pop gdpPercap
## <fctr> <fctr> <int> <dbl> <int> <dbl>
## 1 Afghanistan Asia 1952 28.801 8425333 779.4453
## 2 Afghanistan Asia 1957 30.332 9240934 820.8530
## 3 Afghanistan Asia 1962 31.997 10267083 853.1007
## 4 Afghanistan Asia 1967 34.020 11537966 836.1971
## 5 Afghanistan Asia 1972 36.088 13079460 739.9811
## 6 Afghanistan Asia 1977 38.438 14880372 786.1134
## 7 Afghanistan Asia 1982 39.854 12881816 978.0114
## 8 Afghanistan Asia 1987 40.822 13867957 852.3959
## 9 Afghanistan Asia 1992 41.674 16317921 649.3414
## 10 Afghanistan Asia 1997 41.763 22227415 635.3414
## # ... with 1,694 more rows
gapminder %>%
group_by(country) %>%
select(lifeExp, pop, gdpPercap) %>%
summarize_all(funs(min, max, mean, sd, median))
## Adding missing grouping variables: `country`
## # A tibble: 142 x 16
## country lifeExp_min pop_min gdpPercap_min lifeExp_max pop_max
## <fctr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Afghanistan 28.801 8425333 635.3414 43.828 31889923
## 2 Albania 55.230 1282697 1601.0561 76.423 3600523
## 3 Algeria 43.077 9279525 2449.0082 72.301 33333216
## 4 Angola 30.015 4232095 2277.1409 42.731 12420476
## 5 Argentina 62.485 17876956 5911.3151 75.320 40301927
## 6 Australia 69.120 8691212 10039.5956 81.235 20434176
## 7 Austria 66.800 6927772 6137.0765 79.829 8199783
## 8 Bahrain 50.939 120447 9867.0848 75.635 708573
## 9 Bangladesh 37.484 46886859 630.2336 64.062 150448339
## 10 Belgium 68.000 8730405 8343.1051 79.441 10392226
## # ... with 132 more rows, and 10 more variables: gdpPercap_max <dbl>,
## # lifeExp_mean <dbl>, pop_mean <dbl>, gdpPercap_mean <dbl>,
## # lifeExp_sd <dbl>, pop_sd <dbl>, gdpPercap_sd <dbl>,
## # lifeExp_median <dbl>, pop_median <dbl>, gdpPercap_median <dbl>
library(ggplot2)
gapminder %>%
filter(country %in% c("Canada", "United States", "India")) %>%
ggplot(data = .) +
geom_point(aes(x = year, y = lifeExp, size = pop, color = country))
# gapminder %>%
# filter(country %in% c("Canada", "United States", "India")) %>%
# ggplot(data = .) +
# geom_point(aes(x = year, y = lifeExp, size = pop)) +
# facet_wrap(~country)
library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
ggplot(data = gapminder %>%
filter(continent=="Oceania")) + #Europe Africa
geom_line(aes(x = year, y = gdpPercap, color = country))
ggplotly()
## We recommend that you use the dev version of ggplot2 with `ggplotly()`
## Install it with: `devtools::install_github('hadley/ggplot2')`
library(tidyr)
library(purrr)
##
## Attaching package: 'purrr'
## The following objects are masked from 'package:dplyr':
##
## contains, order_by
by_country <- gapminder %>%
group_by(country, continent) %>%
nest()
by_country
## # A tibble: 142 x 3
## country continent data
## <fctr> <fctr> <list>
## 1 Afghanistan Asia <tibble [12 x 4]>
## 2 Albania Europe <tibble [12 x 4]>
## 3 Algeria Africa <tibble [12 x 4]>
## 4 Angola Africa <tibble [12 x 4]>
## 5 Argentina Americas <tibble [12 x 4]>
## 6 Australia Oceania <tibble [12 x 4]>
## 7 Austria Europe <tibble [12 x 4]>
## 8 Bahrain Asia <tibble [12 x 4]>
## 9 Bangladesh Asia <tibble [12 x 4]>
## 10 Belgium Europe <tibble [12 x 4]>
## # ... with 132 more rows
#Model
country_model <- function(df) {
lm(lifeExp ~ year, data = df)
}
# Fitting model
by_country <- by_country %>%
mutate(model = map(data, country_model))
by_country
## # A tibble: 142 x 4
## country continent data model
## <fctr> <fctr> <list> <list>
## 1 Afghanistan Asia <tibble [12 x 4]> <S3: lm>
## 2 Albania Europe <tibble [12 x 4]> <S3: lm>
## 3 Algeria Africa <tibble [12 x 4]> <S3: lm>
## 4 Angola Africa <tibble [12 x 4]> <S3: lm>
## 5 Argentina Americas <tibble [12 x 4]> <S3: lm>
## 6 Australia Oceania <tibble [12 x 4]> <S3: lm>
## 7 Austria Europe <tibble [12 x 4]> <S3: lm>
## 8 Bahrain Asia <tibble [12 x 4]> <S3: lm>
## 9 Bangladesh Asia <tibble [12 x 4]> <S3: lm>
## 10 Belgium Europe <tibble [12 x 4]> <S3: lm>
## # ... with 132 more rows
library(broom)
by_country %>%
mutate(glance = map(model, glance)) %>%
unnest(glance, .drop = TRUE)
## # A tibble: 142 x 13
## country continent r.squared adj.r.squared sigma statistic
## <fctr> <fctr> <dbl> <dbl> <dbl> <dbl>
## 1 Afghanistan Asia 0.9477123 0.9424835 1.2227880 181.24941
## 2 Albania Europe 0.9105778 0.9016355 1.9830615 101.82901
## 3 Algeria Africa 0.9851172 0.9836289 1.3230064 661.91709
## 4 Angola Africa 0.8878146 0.8765961 1.4070091 79.13818
## 5 Argentina Americas 0.9955681 0.9951249 0.2923072 2246.36635
## 6 Australia Oceania 0.9796477 0.9776125 0.6206086 481.34586
## 7 Austria Europe 0.9921340 0.9913474 0.4074094 1261.29629
## 8 Bahrain Asia 0.9667398 0.9634138 1.6395865 290.65974
## 9 Bangladesh Asia 0.9893609 0.9882970 0.9766908 929.92637
## 10 Belgium Europe 0.9945406 0.9939946 0.2929025 1821.68840
## # ... with 132 more rows, and 7 more variables: p.value <dbl>, df <int>,
## # logLik <dbl>, AIC <dbl>, BIC <dbl>, deviance <dbl>, df.residual <int>
library(gganimate)
library(ggplot2)
p <- ggplot(gapminder, aes(gdpPercap, lifeExp, size = pop, color = continent, frame = year)) +
geom_point() +
scale_x_log10()
gganimate(p)
sessionInfo()
## R version 3.4.1 (2017-06-30)
## Platform: x86_64-w64-mingw32/x64 (64-bit)
## Running under: Windows 8.1 x64 (build 9600)
##
## Matrix products: default
##
## locale:
## [1] LC_COLLATE=English_Canada.1252 LC_CTYPE=English_Canada.1252
## [3] LC_MONETARY=English_Canada.1252 LC_NUMERIC=C
## [5] LC_TIME=English_Canada.1252
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] gganimate_0.1.0.9000 broom_0.4.2 purrr_0.2.2.2
## [4] tidyr_0.6.3 plotly_4.7.0 ggplot2_2.2.1
## [7] gapminder_0.2.0 bindrcpp_0.2 dplyr_0.7.2
## [10] readxl_1.0.0
##
## loaded via a namespace (and not attached):
## [1] Rcpp_0.12.12 cellranger_1.1.0 compiler_3.4.1
## [4] plyr_1.8.4 bindr_0.1 tools_3.4.1
## [7] digest_0.6.12 lattice_0.20-35 nlme_3.1-131
## [10] jsonlite_1.5 evaluate_0.10.1 tibble_1.3.3
## [13] gtable_0.2.0 viridisLite_0.2.0 pkgconfig_2.0.1
## [16] rlang_0.1.1 psych_1.7.5 shiny_1.0.3
## [19] crosstalk_1.0.0 parallel_3.4.1 yaml_2.1.14
## [22] stringr_1.2.0 httr_1.2.1 knitr_1.17
## [25] htmlwidgets_0.9 rprojroot_1.2 grid_3.4.1
## [28] glue_1.1.1 data.table_1.10.4 R6_2.2.2
## [31] foreign_0.8-69 rmarkdown_1.6 reshape2_1.4.2
## [34] magrittr_1.5 backports_1.1.0 scales_0.4.1
## [37] htmltools_0.3.6 mnormt_1.5-5 assertthat_0.2.0
## [40] xtable_1.8-2 mime_0.5 colorspace_1.3-2
## [43] httpuv_1.3.5 labeling_0.3 stringi_1.1.5
## [46] lazyeval_0.2.0 munsell_0.4.3