Homework 6 (2.5%) due October 30
setwd("~/SOC202 Documents/tutorial")
getwd()
## [1] "C:/Users/Adrien/Documents/SOC202 Documents/tutorial"
library(fst)
ess <- read_fst("All-ESS-Data.fst")
Mission 1 Launch a new R project and R markdown file. Name it “Lastname_Firstname_Project_202”. Set up your environment with packages you will use.
packages <- c("tidyverse", "modelsummary", "forcats", "RColorBrewer",
"fst", "viridis", "knitr", "rmarkdown", "ggridges", "viridis", "questionr", "flextable", "infer") # add any you need here
new_packages <- packages[!(packages %in% installed.packages()[,"Package"])]
if(length(new_packages)) install.packages(new_packages)
lapply(packages, library, character.only = TRUE)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.3 ✔ readr 2.1.4
## ✔ forcats 1.0.0 ✔ stringr 1.5.0
## ✔ ggplot2 3.4.3 ✔ tibble 3.2.1
## ✔ lubridate 1.9.2 ✔ tidyr 1.3.0
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
## Loading required package: viridisLite
##
##
## Attaching package: 'flextable'
##
##
## The following object is masked from 'package:purrr':
##
## compose
## [[1]]
## [1] "lubridate" "forcats" "stringr" "dplyr" "purrr" "readr"
## [7] "tidyr" "tibble" "ggplot2" "tidyverse" "fstcore" "fst"
## [13] "stats" "graphics" "grDevices" "utils" "datasets" "methods"
## [19] "base"
##
## [[2]]
## [1] "modelsummary" "lubridate" "forcats" "stringr" "dplyr"
## [6] "purrr" "readr" "tidyr" "tibble" "ggplot2"
## [11] "tidyverse" "fstcore" "fst" "stats" "graphics"
## [16] "grDevices" "utils" "datasets" "methods" "base"
##
## [[3]]
## [1] "modelsummary" "lubridate" "forcats" "stringr" "dplyr"
## [6] "purrr" "readr" "tidyr" "tibble" "ggplot2"
## [11] "tidyverse" "fstcore" "fst" "stats" "graphics"
## [16] "grDevices" "utils" "datasets" "methods" "base"
##
## [[4]]
## [1] "RColorBrewer" "modelsummary" "lubridate" "forcats" "stringr"
## [6] "dplyr" "purrr" "readr" "tidyr" "tibble"
## [11] "ggplot2" "tidyverse" "fstcore" "fst" "stats"
## [16] "graphics" "grDevices" "utils" "datasets" "methods"
## [21] "base"
##
## [[5]]
## [1] "RColorBrewer" "modelsummary" "lubridate" "forcats" "stringr"
## [6] "dplyr" "purrr" "readr" "tidyr" "tibble"
## [11] "ggplot2" "tidyverse" "fstcore" "fst" "stats"
## [16] "graphics" "grDevices" "utils" "datasets" "methods"
## [21] "base"
##
## [[6]]
## [1] "viridis" "viridisLite" "RColorBrewer" "modelsummary" "lubridate"
## [6] "forcats" "stringr" "dplyr" "purrr" "readr"
## [11] "tidyr" "tibble" "ggplot2" "tidyverse" "fstcore"
## [16] "fst" "stats" "graphics" "grDevices" "utils"
## [21] "datasets" "methods" "base"
##
## [[7]]
## [1] "knitr" "viridis" "viridisLite" "RColorBrewer" "modelsummary"
## [6] "lubridate" "forcats" "stringr" "dplyr" "purrr"
## [11] "readr" "tidyr" "tibble" "ggplot2" "tidyverse"
## [16] "fstcore" "fst" "stats" "graphics" "grDevices"
## [21] "utils" "datasets" "methods" "base"
##
## [[8]]
## [1] "rmarkdown" "knitr" "viridis" "viridisLite" "RColorBrewer"
## [6] "modelsummary" "lubridate" "forcats" "stringr" "dplyr"
## [11] "purrr" "readr" "tidyr" "tibble" "ggplot2"
## [16] "tidyverse" "fstcore" "fst" "stats" "graphics"
## [21] "grDevices" "utils" "datasets" "methods" "base"
##
## [[9]]
## [1] "ggridges" "rmarkdown" "knitr" "viridis" "viridisLite"
## [6] "RColorBrewer" "modelsummary" "lubridate" "forcats" "stringr"
## [11] "dplyr" "purrr" "readr" "tidyr" "tibble"
## [16] "ggplot2" "tidyverse" "fstcore" "fst" "stats"
## [21] "graphics" "grDevices" "utils" "datasets" "methods"
## [26] "base"
##
## [[10]]
## [1] "ggridges" "rmarkdown" "knitr" "viridis" "viridisLite"
## [6] "RColorBrewer" "modelsummary" "lubridate" "forcats" "stringr"
## [11] "dplyr" "purrr" "readr" "tidyr" "tibble"
## [16] "ggplot2" "tidyverse" "fstcore" "fst" "stats"
## [21] "graphics" "grDevices" "utils" "datasets" "methods"
## [26] "base"
##
## [[11]]
## [1] "questionr" "ggridges" "rmarkdown" "knitr" "viridis"
## [6] "viridisLite" "RColorBrewer" "modelsummary" "lubridate" "forcats"
## [11] "stringr" "dplyr" "purrr" "readr" "tidyr"
## [16] "tibble" "ggplot2" "tidyverse" "fstcore" "fst"
## [21] "stats" "graphics" "grDevices" "utils" "datasets"
## [26] "methods" "base"
##
## [[12]]
## [1] "flextable" "questionr" "ggridges" "rmarkdown" "knitr"
## [6] "viridis" "viridisLite" "RColorBrewer" "modelsummary" "lubridate"
## [11] "forcats" "stringr" "dplyr" "purrr" "readr"
## [16] "tidyr" "tibble" "ggplot2" "tidyverse" "fstcore"
## [21] "fst" "stats" "graphics" "grDevices" "utils"
## [26] "datasets" "methods" "base"
##
## [[13]]
## [1] "infer" "flextable" "questionr" "ggridges" "rmarkdown"
## [6] "knitr" "viridis" "viridisLite" "RColorBrewer" "modelsummary"
## [11] "lubridate" "forcats" "stringr" "dplyr" "purrr"
## [16] "readr" "tidyr" "tibble" "ggplot2" "tidyverse"
## [21] "fstcore" "fst" "stats" "graphics" "grDevices"
## [26] "utils" "datasets" "methods" "base"
Mission 2 Filter to your country of interest and save the dataset.
finland_data <- ess %>%
filter(cntry == "FI")
write_fst(finland_data, "~/SOC202 Documents/tutorial/finland_data.fst")
getwd()
## [1] "C:/Users/Adrien/Documents/SOC202 Documents/tutorial/Homework_6_Project"
Mission 3 Clean your environment and load in the filtered dataset.
rm(list=ls()); gc()
## used (Mb) gc trigger (Mb) max used (Mb)
## Ncells 1288699 68.9 2401321 128.3 1736314 92.8
## Vcells 2163101 16.6 1504684699 11479.9 1357464050 10356.7
df <- read_fst("~/SOC202 Documents/Project/finland_data.fst")
Mission 4 Produce and save a data summary output (using data summary skim) for potential outcomes of interest on a similar scale (e.g., 0-10, or 1 to 6, or binary). Add a title. You can do so while coding (explore package information for flextable and/or modelsummary) or add it directly in the word file. Title should be something like: Table 1: Descriptive Statistics for outcome variables. You can alter the title as you see fit.
df$year <- NA
replacements <- c(2002, 2004, 2006, 2008, 2010, 2012, 2014, 2016, 2018, 2020)
for(i in 1:10){
df$year[df$essround == i] <- replacements[i]
}
finland_data <- df
finland_data_table_subset <- finland_data %>%
mutate(
stfdem = ifelse(stfdem %in% c(77, 88, 99), NA, stfdem),
atchctr = ifelse(atchctr %in% c(77, 88, 99), NA, atchctr),
iplylfr = ifelse(iplylfr %in% c(7, 8, 9), NA, iplylfr)
)
summary_table <- datasummary_skim(finland_data_table_subset %>% select(stfdem, atchctr, iplylfr), output = "flextable")
## Warning: The histogram argument is only supported for (a) output types "default",
## "html", "kableExtra", or "gt"; (b) writing to file paths with extensions
## ".html", ".jpg", or ".png"; and (c) Rmarkdown, knitr or Quarto documents
## compiled to PDF (via kableExtra) or HTML (via kableExtra or gt). Use
## `histogram=FALSE` to silence this warning.
summary_table
| Unique (#) | Missing (%) | Mean | SD | Min | Median | Max |
|---|---|---|---|---|---|---|---|
stfdem | 12 | 3 | 6.5 | 2.0 | 0.0 | 7.0 | 10.0 |
atchctr | 12 | 73 | 8.6 | 1.6 | 0.0 | 9.0 | 10.0 |
iplylfr | 7 | 8 | 1.8 | 0.8 | 1.0 | 2.0 | 6.0 |
finland_data_v2 <- finland_data_table_subset %>%
rename(
`Satisfaction with Democracy` = stfdem,
`Emotional Attachment to Country` = atchctr,
`Loyalty to Friends` = iplylfr
)
summary_table_v2 <- datasummary_skim(finland_data_v2 %>% select(`Satisfaction with Democracy`,`Emotional Attachment to Country`, `Loyalty to Friends`), output = "flextable")
## Warning: The histogram argument is only supported for (a) output types "default",
## "html", "kableExtra", or "gt"; (b) writing to file paths with extensions
## ".html", ".jpg", or ".png"; and (c) Rmarkdown, knitr or Quarto documents
## compiled to PDF (via kableExtra) or HTML (via kableExtra or gt). Use
## `histogram=FALSE` to silence this warning.
summary_table_v2
| Unique (#) | Missing (%) | Mean | SD | Min | Median | Max |
|---|---|---|---|---|---|---|---|
Satisfaction with Democracy | 12 | 3 | 6.5 | 2.0 | 0.0 | 7.0 | 10.0 |
Emotional Attachment to Country | 12 | 73 | 8.6 | 1.6 | 0.0 | 9.0 | 10.0 |
Loyalty to Friends | 7 | 8 | 1.8 | 0.8 | 1.0 | 2.0 | 6.0 |
flextable::save_as_docx(summary_table_v2, path = "summary_table_v2.docx",
width = 7.0, height = 7.0)
tinytex::install_tinytex(force = TRUE)
## tlmgr install colortbl environ makecell multirow pdflscape tabu threeparttable threeparttablex trimspaces ulem varwidth wrapfig
library(tinytex)
set_flextable_defaults(fonts_ignore=TRUE)
print(summary_table_v2, preview = "pdf")
## a flextable object.
## col_keys: ` `, `Unique (#)`, `Missing (%)`, `Mean`, `SD`, `Min`, `Median`, `Max`
## header has 1 row(s)
## body has 3 row(s)
## original dataset sample:
## Unique (#) Missing (%) Mean SD Min Median
## 1 Satisfaction with Democracy 12 3 6.5 2.0 0.0 7.0
## 2 Emotional Attachment to Country 12 73 8.6 1.6 0.0 9.0
## 3 Loyalty to Friends 7 8 1.8 0.8 1.0 2.0
## Max
## 1 10.0
## 2 10.0
## 3 6.0
Mission 11 Produce a visualization that compares the average for an outcome of interest to all other countries and arranges the values in order – this allows to quickly visually compare to get a sense as to where your country “stands” (i.e., does it seem to be an outlier, fairly “average”, or somewhere in between). You can also play around with computing averages by categorical variables or say for only Millennials to see where your country stands. This is still very exploratory but can help guide you towards what is worth exploring next.
setwd("~/SOC202 Documents/tutorial")
getwd()
## [1] "C:/Users/Adrien/Documents/SOC202 Documents/tutorial"
ess <- read_fst("All-ESS-Data.fst")
country_averages <- ess %>%
filter(!(trstsci %in% c(77, 88, 99))) %>%
group_by(cntry) %>%
summarize(avg_trstsci = mean(trstsci, na.rm = TRUE))
country_averages$cntry <- reorder(country_averages$cntry, country_averages$avg_trstsci)
ps <- ggplot(country_averages, aes(x = cntry, y = avg_trstsci, label = cntry)) +
geom_point(aes(color = avg_trstsci), size = 5) +
geom_text(nudge_y = 0.2, size = 3) +
labs(
x = "Country",
y = "Average Trust in Scientists",
title = "Average Trust in Scientists by Country"
) +
theme_minimal() +
theme(legend.position = "none",
axis.text.x = element_blank(),
axis.title.x = element_text(face="bold", size=14))
ps
## Warning: Removed 12 rows containing missing values (`geom_point()`).
## Warning: Removed 12 rows containing missing values (`geom_text()`).