Homework 6 (2.5%) due October 30

setwd("~/SOC202 Documents/tutorial")
getwd()
## [1] "C:/Users/Adrien/Documents/SOC202 Documents/tutorial"
library(fst)
ess <- read_fst("All-ESS-Data.fst")

Mission 1 Launch a new R project and R markdown file. Name it “Lastname_Firstname_Project_202”. Set up your environment with packages you will use.

packages <- c("tidyverse", "modelsummary", "forcats", "RColorBrewer", 
              "fst", "viridis", "knitr", "rmarkdown", "ggridges", "viridis", "questionr", "flextable", "infer") # add any you need here

new_packages <- packages[!(packages %in% installed.packages()[,"Package"])]
if(length(new_packages)) install.packages(new_packages)

lapply(packages, library, character.only = TRUE)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.3     ✔ readr     2.1.4
## ✔ forcats   1.0.0     ✔ stringr   1.5.0
## ✔ ggplot2   3.4.3     ✔ tibble    3.2.1
## ✔ lubridate 1.9.2     ✔ tidyr     1.3.0
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
## Loading required package: viridisLite
## 
## 
## Attaching package: 'flextable'
## 
## 
## The following object is masked from 'package:purrr':
## 
##     compose
## [[1]]
##  [1] "lubridate" "forcats"   "stringr"   "dplyr"     "purrr"     "readr"    
##  [7] "tidyr"     "tibble"    "ggplot2"   "tidyverse" "fstcore"   "fst"      
## [13] "stats"     "graphics"  "grDevices" "utils"     "datasets"  "methods"  
## [19] "base"     
## 
## [[2]]
##  [1] "modelsummary" "lubridate"    "forcats"      "stringr"      "dplyr"       
##  [6] "purrr"        "readr"        "tidyr"        "tibble"       "ggplot2"     
## [11] "tidyverse"    "fstcore"      "fst"          "stats"        "graphics"    
## [16] "grDevices"    "utils"        "datasets"     "methods"      "base"        
## 
## [[3]]
##  [1] "modelsummary" "lubridate"    "forcats"      "stringr"      "dplyr"       
##  [6] "purrr"        "readr"        "tidyr"        "tibble"       "ggplot2"     
## [11] "tidyverse"    "fstcore"      "fst"          "stats"        "graphics"    
## [16] "grDevices"    "utils"        "datasets"     "methods"      "base"        
## 
## [[4]]
##  [1] "RColorBrewer" "modelsummary" "lubridate"    "forcats"      "stringr"     
##  [6] "dplyr"        "purrr"        "readr"        "tidyr"        "tibble"      
## [11] "ggplot2"      "tidyverse"    "fstcore"      "fst"          "stats"       
## [16] "graphics"     "grDevices"    "utils"        "datasets"     "methods"     
## [21] "base"        
## 
## [[5]]
##  [1] "RColorBrewer" "modelsummary" "lubridate"    "forcats"      "stringr"     
##  [6] "dplyr"        "purrr"        "readr"        "tidyr"        "tibble"      
## [11] "ggplot2"      "tidyverse"    "fstcore"      "fst"          "stats"       
## [16] "graphics"     "grDevices"    "utils"        "datasets"     "methods"     
## [21] "base"        
## 
## [[6]]
##  [1] "viridis"      "viridisLite"  "RColorBrewer" "modelsummary" "lubridate"   
##  [6] "forcats"      "stringr"      "dplyr"        "purrr"        "readr"       
## [11] "tidyr"        "tibble"       "ggplot2"      "tidyverse"    "fstcore"     
## [16] "fst"          "stats"        "graphics"     "grDevices"    "utils"       
## [21] "datasets"     "methods"      "base"        
## 
## [[7]]
##  [1] "knitr"        "viridis"      "viridisLite"  "RColorBrewer" "modelsummary"
##  [6] "lubridate"    "forcats"      "stringr"      "dplyr"        "purrr"       
## [11] "readr"        "tidyr"        "tibble"       "ggplot2"      "tidyverse"   
## [16] "fstcore"      "fst"          "stats"        "graphics"     "grDevices"   
## [21] "utils"        "datasets"     "methods"      "base"        
## 
## [[8]]
##  [1] "rmarkdown"    "knitr"        "viridis"      "viridisLite"  "RColorBrewer"
##  [6] "modelsummary" "lubridate"    "forcats"      "stringr"      "dplyr"       
## [11] "purrr"        "readr"        "tidyr"        "tibble"       "ggplot2"     
## [16] "tidyverse"    "fstcore"      "fst"          "stats"        "graphics"    
## [21] "grDevices"    "utils"        "datasets"     "methods"      "base"        
## 
## [[9]]
##  [1] "ggridges"     "rmarkdown"    "knitr"        "viridis"      "viridisLite" 
##  [6] "RColorBrewer" "modelsummary" "lubridate"    "forcats"      "stringr"     
## [11] "dplyr"        "purrr"        "readr"        "tidyr"        "tibble"      
## [16] "ggplot2"      "tidyverse"    "fstcore"      "fst"          "stats"       
## [21] "graphics"     "grDevices"    "utils"        "datasets"     "methods"     
## [26] "base"        
## 
## [[10]]
##  [1] "ggridges"     "rmarkdown"    "knitr"        "viridis"      "viridisLite" 
##  [6] "RColorBrewer" "modelsummary" "lubridate"    "forcats"      "stringr"     
## [11] "dplyr"        "purrr"        "readr"        "tidyr"        "tibble"      
## [16] "ggplot2"      "tidyverse"    "fstcore"      "fst"          "stats"       
## [21] "graphics"     "grDevices"    "utils"        "datasets"     "methods"     
## [26] "base"        
## 
## [[11]]
##  [1] "questionr"    "ggridges"     "rmarkdown"    "knitr"        "viridis"     
##  [6] "viridisLite"  "RColorBrewer" "modelsummary" "lubridate"    "forcats"     
## [11] "stringr"      "dplyr"        "purrr"        "readr"        "tidyr"       
## [16] "tibble"       "ggplot2"      "tidyverse"    "fstcore"      "fst"         
## [21] "stats"        "graphics"     "grDevices"    "utils"        "datasets"    
## [26] "methods"      "base"        
## 
## [[12]]
##  [1] "flextable"    "questionr"    "ggridges"     "rmarkdown"    "knitr"       
##  [6] "viridis"      "viridisLite"  "RColorBrewer" "modelsummary" "lubridate"   
## [11] "forcats"      "stringr"      "dplyr"        "purrr"        "readr"       
## [16] "tidyr"        "tibble"       "ggplot2"      "tidyverse"    "fstcore"     
## [21] "fst"          "stats"        "graphics"     "grDevices"    "utils"       
## [26] "datasets"     "methods"      "base"        
## 
## [[13]]
##  [1] "infer"        "flextable"    "questionr"    "ggridges"     "rmarkdown"   
##  [6] "knitr"        "viridis"      "viridisLite"  "RColorBrewer" "modelsummary"
## [11] "lubridate"    "forcats"      "stringr"      "dplyr"        "purrr"       
## [16] "readr"        "tidyr"        "tibble"       "ggplot2"      "tidyverse"   
## [21] "fstcore"      "fst"          "stats"        "graphics"     "grDevices"   
## [26] "utils"        "datasets"     "methods"      "base"

Mission 2 Filter to your country of interest and save the dataset.

finland_data <- ess %>% 
  filter(cntry == "FI") 

write_fst(finland_data, "~/SOC202 Documents/tutorial/finland_data.fst") 
getwd()
## [1] "C:/Users/Adrien/Documents/SOC202 Documents/tutorial/Homework_6_Project"

Mission 3 Clean your environment and load in the filtered dataset.

rm(list=ls()); gc()
##           used (Mb) gc trigger    (Mb)   max used    (Mb)
## Ncells 1288699 68.9    2401321   128.3    1736314    92.8
## Vcells 2163101 16.6 1504684699 11479.9 1357464050 10356.7
df <- read_fst("~/SOC202 Documents/Project/finland_data.fst")

Mission 4 Produce and save a data summary output (using data summary skim) for potential outcomes of interest on a similar scale (e.g., 0-10, or 1 to 6, or binary). Add a title. You can do so while coding (explore package information for flextable and/or modelsummary) or add it directly in the word file. Title should be something like: Table 1: Descriptive Statistics for outcome variables. You can alter the title as you see fit.

df$year <- NA
replacements <- c(2002, 2004, 2006, 2008, 2010, 2012, 2014, 2016, 2018, 2020)
for(i in 1:10){
  df$year[df$essround == i] <- replacements[i]
}

finland_data <- df

finland_data_table_subset <- finland_data %>%
  mutate(
    stfdem = ifelse(stfdem %in% c(77, 88, 99), NA, stfdem),
    atchctr = ifelse(atchctr %in% c(77, 88, 99), NA, atchctr),
    iplylfr = ifelse(iplylfr %in% c(7, 8, 9), NA, iplylfr)
  )

summary_table <- datasummary_skim(finland_data_table_subset %>% select(stfdem, atchctr, iplylfr), output = "flextable")
## Warning: The histogram argument is only supported for (a) output types "default",
##   "html", "kableExtra", or "gt"; (b) writing to file paths with extensions
##   ".html", ".jpg", or ".png"; and (c) Rmarkdown, knitr or Quarto documents
##   compiled to PDF (via kableExtra)  or HTML (via kableExtra or gt). Use
##   `histogram=FALSE` to silence this warning.
summary_table

Unique (#)

Missing (%)

Mean

SD

Min

Median

Max

stfdem

12

3

6.5

2.0

0.0

7.0

10.0

atchctr

12

73

8.6

1.6

0.0

9.0

10.0

iplylfr

7

8

1.8

0.8

1.0

2.0

6.0

finland_data_v2 <- finland_data_table_subset %>%
  rename(
    `Satisfaction with Democracy` = stfdem,
    `Emotional Attachment to Country` = atchctr,
    `Loyalty to Friends` = iplylfr
  )

summary_table_v2 <- datasummary_skim(finland_data_v2 %>% select(`Satisfaction with Democracy`,`Emotional Attachment to Country`, `Loyalty to Friends`), output = "flextable")
## Warning: The histogram argument is only supported for (a) output types "default",
##   "html", "kableExtra", or "gt"; (b) writing to file paths with extensions
##   ".html", ".jpg", or ".png"; and (c) Rmarkdown, knitr or Quarto documents
##   compiled to PDF (via kableExtra)  or HTML (via kableExtra or gt). Use
##   `histogram=FALSE` to silence this warning.
summary_table_v2

Unique (#)

Missing (%)

Mean

SD

Min

Median

Max

Satisfaction with Democracy

12

3

6.5

2.0

0.0

7.0

10.0

Emotional Attachment to Country

12

73

8.6

1.6

0.0

9.0

10.0

Loyalty to Friends

7

8

1.8

0.8

1.0

2.0

6.0

flextable::save_as_docx(summary_table_v2, path = "summary_table_v2.docx", 
                       width = 7.0, height = 7.0) 
tinytex::install_tinytex(force = TRUE)
## tlmgr install colortbl environ makecell multirow pdflscape tabu threeparttable threeparttablex trimspaces ulem varwidth wrapfig
library(tinytex)
set_flextable_defaults(fonts_ignore=TRUE)
print(summary_table_v2, preview = "pdf")
## a flextable object.
## col_keys: ` `, `Unique (#)`, `Missing (%)`, `Mean`, `SD`, `Min`, `Median`, `Max` 
## header has 1 row(s) 
## body has 3 row(s) 
## original dataset sample: 
##                                   Unique (#) Missing (%) Mean  SD Min Median
## 1     Satisfaction with Democracy         12           3  6.5 2.0 0.0    7.0
## 2 Emotional Attachment to Country         12          73  8.6 1.6 0.0    9.0
## 3              Loyalty to Friends          7           8  1.8 0.8 1.0    2.0
##    Max
## 1 10.0
## 2 10.0
## 3  6.0

Mission 11 Produce a visualization that compares the average for an outcome of interest to all other countries and arranges the values in order – this allows to quickly visually compare to get a sense as to where your country “stands” (i.e., does it seem to be an outlier, fairly “average”, or somewhere in between). You can also play around with computing averages by categorical variables or say for only Millennials to see where your country stands. This is still very exploratory but can help guide you towards what is worth exploring next.

setwd("~/SOC202 Documents/tutorial")
getwd()
## [1] "C:/Users/Adrien/Documents/SOC202 Documents/tutorial"
ess <- read_fst("All-ESS-Data.fst")
country_averages <- ess %>%
  filter(!(trstsci %in% c(77, 88, 99))) %>%
  group_by(cntry) %>%
  summarize(avg_trstsci = mean(trstsci, na.rm = TRUE))
country_averages$cntry <- reorder(country_averages$cntry, country_averages$avg_trstsci)

ps <- ggplot(country_averages, aes(x = cntry, y = avg_trstsci, label = cntry)) +
  geom_point(aes(color = avg_trstsci), size = 5) + 
  geom_text(nudge_y = 0.2, size = 3) + 
  labs(
    x = "Country",
    y = "Average Trust in Scientists",
    title = "Average Trust in Scientists by Country"
  ) +
  theme_minimal() +
  theme(legend.position = "none",   
        axis.text.x = element_blank(),
        axis.title.x = element_text(face="bold", size=14))

ps
## Warning: Removed 12 rows containing missing values (`geom_point()`).
## Warning: Removed 12 rows containing missing values (`geom_text()`).