Problem Set 1

Task 1: Visualization

packages <- c("tidyverse", "gapminder", "fst", "viridis", "ggridges", "modelsummary", "RColorBrewer", "effects")

new_packages <- packages[!(packages %in% installed.packages()[,"Package"])]
if(length(new_packages)) install.packages(new_packages)

lapply(packages, library, character.only = TRUE)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.3     ✔ readr     2.1.4
## ✔ forcats   1.0.0     ✔ stringr   1.5.0
## ✔ ggplot2   3.4.3     ✔ tibble    3.2.1
## ✔ lubridate 1.9.2     ✔ tidyr     1.3.0
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
## Loading required package: viridisLite
## 
## Loading required package: carData
## 
## lattice theme set by effectsTheme()
## See ?effectsTheme for details.
## [[1]]
##  [1] "lubridate" "forcats"   "stringr"   "dplyr"     "purrr"     "readr"    
##  [7] "tidyr"     "tibble"    "ggplot2"   "tidyverse" "stats"     "graphics" 
## [13] "grDevices" "utils"     "datasets"  "methods"   "base"     
## 
## [[2]]
##  [1] "gapminder" "lubridate" "forcats"   "stringr"   "dplyr"     "purrr"    
##  [7] "readr"     "tidyr"     "tibble"    "ggplot2"   "tidyverse" "stats"    
## [13] "graphics"  "grDevices" "utils"     "datasets"  "methods"   "base"     
## 
## [[3]]
##  [1] "fst"       "gapminder" "lubridate" "forcats"   "stringr"   "dplyr"    
##  [7] "purrr"     "readr"     "tidyr"     "tibble"    "ggplot2"   "tidyverse"
## [13] "stats"     "graphics"  "grDevices" "utils"     "datasets"  "methods"  
## [19] "base"     
## 
## [[4]]
##  [1] "viridis"     "viridisLite" "fst"         "gapminder"   "lubridate"  
##  [6] "forcats"     "stringr"     "dplyr"       "purrr"       "readr"      
## [11] "tidyr"       "tibble"      "ggplot2"     "tidyverse"   "stats"      
## [16] "graphics"    "grDevices"   "utils"       "datasets"    "methods"    
## [21] "base"       
## 
## [[5]]
##  [1] "ggridges"    "viridis"     "viridisLite" "fst"         "gapminder"  
##  [6] "lubridate"   "forcats"     "stringr"     "dplyr"       "purrr"      
## [11] "readr"       "tidyr"       "tibble"      "ggplot2"     "tidyverse"  
## [16] "stats"       "graphics"    "grDevices"   "utils"       "datasets"   
## [21] "methods"     "base"       
## 
## [[6]]
##  [1] "modelsummary" "ggridges"     "viridis"      "viridisLite"  "fst"         
##  [6] "gapminder"    "lubridate"    "forcats"      "stringr"      "dplyr"       
## [11] "purrr"        "readr"        "tidyr"        "tibble"       "ggplot2"     
## [16] "tidyverse"    "stats"        "graphics"     "grDevices"    "utils"       
## [21] "datasets"     "methods"      "base"        
## 
## [[7]]
##  [1] "RColorBrewer" "modelsummary" "ggridges"     "viridis"      "viridisLite" 
##  [6] "fst"          "gapminder"    "lubridate"    "forcats"      "stringr"     
## [11] "dplyr"        "purrr"        "readr"        "tidyr"        "tibble"      
## [16] "ggplot2"      "tidyverse"    "stats"        "graphics"     "grDevices"   
## [21] "utils"        "datasets"     "methods"      "base"        
## 
## [[8]]
##  [1] "effects"      "carData"      "RColorBrewer" "modelsummary" "ggridges"    
##  [6] "viridis"      "viridisLite"  "fst"          "gapminder"    "lubridate"   
## [11] "forcats"      "stringr"      "dplyr"        "purrr"        "readr"       
## [16] "tidyr"        "tibble"       "ggplot2"      "tidyverse"    "stats"       
## [21] "graphics"     "grDevices"    "utils"        "datasets"     "methods"     
## [26] "base"
data(gapminder)
gapminder_subset <- gapminder %>% dplyr::select(country, year, lifeExp, gdpPercap)
head(gapminder_subset)
## # A tibble: 6 × 4
##   country      year lifeExp gdpPercap
##   <fct>       <int>   <dbl>     <dbl>
## 1 Afghanistan  1952    28.8      779.
## 2 Afghanistan  1957    30.3      821.
## 3 Afghanistan  1962    32.0      853.
## 4 Afghanistan  1967    34.0      836.
## 5 Afghanistan  1972    36.1      740.
## 6 Afghanistan  1977    38.4      786.
gapminder <- gapminder %>% mutate(total_gdp = pop * gdpPercap)
head(gapminder)
## # A tibble: 6 × 7
##   country     continent  year lifeExp      pop gdpPercap    total_gdp
##   <fct>       <fct>     <int>   <dbl>    <int>     <dbl>        <dbl>
## 1 Afghanistan Asia       1952    28.8  8425333      779.  6567086330.
## 2 Afghanistan Asia       1957    30.3  9240934      821.  7585448670.
## 3 Afghanistan Asia       1962    32.0 10267083      853.  8758855797.
## 4 Afghanistan Asia       1967    34.0 11537966      836.  9648014150.
## 5 Afghanistan Asia       1972    36.1 13079460      740.  9678553274.
## 6 Afghanistan Asia       1977    38.4 14880372      786. 11697659231.
gapminder %>% group_by(continent) %>% summarize(mean_lifeExp = mean(lifeExp, na.rm = TRUE))
## # A tibble: 5 × 2
##   continent mean_lifeExp
##   <fct>            <dbl>
## 1 Africa            48.9
## 2 Americas          64.7
## 3 Asia              60.1
## 4 Europe            71.9
## 5 Oceania           74.3
ggplot(gapminder, aes(x = gdpPercap, y = lifeExp)) +
  geom_point(aes(color = continent), alpha = 0.9) + 
  labs(title = "Life Expectancy Based on GDP per Capita",
       x = "GDP per Capita", y = "Life Expectancy", color = "Continent") + 
  theme_minimal() 

Task 2: Descriptive Statistic Interpretation

summary_mean <- gapminder %>% 
  group_by(continent) %>% 
  summarize(
    mean_lifeExp = mean(lifeExp, na.rm = TRUE),
    mean_gdpPercap = mean(gdpPercap, na.rm = TRUE)
    )
print(summary_mean)
## # A tibble: 5 × 3
##   continent mean_lifeExp mean_gdpPercap
##   <fct>            <dbl>          <dbl>
## 1 Africa            48.9          2194.
## 2 Americas          64.7          7136.
## 3 Asia              60.1          7902.
## 4 Europe            71.9         14469.
## 5 Oceania           74.3         18622.

The table shows a strong positive correlation: as the GDP per capita mean increases, the life expectancy mean tends to increase as well

Task 3: Printing

print('I did it!')
## [1] "I did it!"