# List of packages
packages <- c("tidyverse", "infer", "fst", "modelsummary", "effects", "survey", "MASS", "kableExtra", "flextable", "equatiomatic") # add any you need here
# Install packages if they aren't installed already
new_packages <- packages[!(packages %in% installed.packages()[,"Package"])]
if(length(new_packages)) install.packages(new_packages)
# Load the packages
lapply(packages, library, character.only = TRUE)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.3 ✔ readr 2.1.4
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.4.3 ✔ tibble 3.2.1
## ✔ lubridate 1.9.2 ✔ tidyr 1.3.0
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
## Loading required package: carData
##
## lattice theme set by effectsTheme()
## See ?effectsTheme for details.
##
## Loading required package: grid
##
## Loading required package: Matrix
##
##
## Attaching package: 'Matrix'
##
##
## The following objects are masked from 'package:tidyr':
##
## expand, pack, unpack
##
##
## Loading required package: survival
##
##
## Attaching package: 'survey'
##
##
## The following object is masked from 'package:graphics':
##
## dotchart
##
##
##
## Attaching package: 'MASS'
##
##
## The following object is masked from 'package:dplyr':
##
## select
##
##
##
## Attaching package: 'kableExtra'
##
##
## The following object is masked from 'package:dplyr':
##
## group_rows
##
##
##
## Attaching package: 'flextable'
##
##
## The following objects are masked from 'package:kableExtra':
##
## as_image, footnote
##
##
## The following object is masked from 'package:purrr':
##
## compose
## [[1]]
## [1] "lubridate" "forcats" "stringr" "dplyr" "purrr" "readr"
## [7] "tidyr" "tibble" "ggplot2" "tidyverse" "stats" "graphics"
## [13] "grDevices" "utils" "datasets" "methods" "base"
##
## [[2]]
## [1] "infer" "lubridate" "forcats" "stringr" "dplyr" "purrr"
## [7] "readr" "tidyr" "tibble" "ggplot2" "tidyverse" "stats"
## [13] "graphics" "grDevices" "utils" "datasets" "methods" "base"
##
## [[3]]
## [1] "fst" "infer" "lubridate" "forcats" "stringr" "dplyr"
## [7] "purrr" "readr" "tidyr" "tibble" "ggplot2" "tidyverse"
## [13] "stats" "graphics" "grDevices" "utils" "datasets" "methods"
## [19] "base"
##
## [[4]]
## [1] "modelsummary" "fst" "infer" "lubridate" "forcats"
## [6] "stringr" "dplyr" "purrr" "readr" "tidyr"
## [11] "tibble" "ggplot2" "tidyverse" "stats" "graphics"
## [16] "grDevices" "utils" "datasets" "methods" "base"
##
## [[5]]
## [1] "effects" "carData" "modelsummary" "fst" "infer"
## [6] "lubridate" "forcats" "stringr" "dplyr" "purrr"
## [11] "readr" "tidyr" "tibble" "ggplot2" "tidyverse"
## [16] "stats" "graphics" "grDevices" "utils" "datasets"
## [21] "methods" "base"
##
## [[6]]
## [1] "survey" "survival" "Matrix" "grid" "effects"
## [6] "carData" "modelsummary" "fst" "infer" "lubridate"
## [11] "forcats" "stringr" "dplyr" "purrr" "readr"
## [16] "tidyr" "tibble" "ggplot2" "tidyverse" "stats"
## [21] "graphics" "grDevices" "utils" "datasets" "methods"
## [26] "base"
##
## [[7]]
## [1] "MASS" "survey" "survival" "Matrix" "grid"
## [6] "effects" "carData" "modelsummary" "fst" "infer"
## [11] "lubridate" "forcats" "stringr" "dplyr" "purrr"
## [16] "readr" "tidyr" "tibble" "ggplot2" "tidyverse"
## [21] "stats" "graphics" "grDevices" "utils" "datasets"
## [26] "methods" "base"
##
## [[8]]
## [1] "kableExtra" "MASS" "survey" "survival" "Matrix"
## [6] "grid" "effects" "carData" "modelsummary" "fst"
## [11] "infer" "lubridate" "forcats" "stringr" "dplyr"
## [16] "purrr" "readr" "tidyr" "tibble" "ggplot2"
## [21] "tidyverse" "stats" "graphics" "grDevices" "utils"
## [26] "datasets" "methods" "base"
##
## [[9]]
## [1] "flextable" "kableExtra" "MASS" "survey" "survival"
## [6] "Matrix" "grid" "effects" "carData" "modelsummary"
## [11] "fst" "infer" "lubridate" "forcats" "stringr"
## [16] "dplyr" "purrr" "readr" "tidyr" "tibble"
## [21] "ggplot2" "tidyverse" "stats" "graphics" "grDevices"
## [26] "utils" "datasets" "methods" "base"
##
## [[10]]
## [1] "equatiomatic" "flextable" "kableExtra" "MASS" "survey"
## [6] "survival" "Matrix" "grid" "effects" "carData"
## [11] "modelsummary" "fst" "infer" "lubridate" "forcats"
## [16] "stringr" "dplyr" "purrr" "readr" "tidyr"
## [21] "tibble" "ggplot2" "tidyverse" "stats" "graphics"
## [26] "grDevices" "utils" "datasets" "methods" "base"
tinytex::install_tinytex(force = TRUE)
## The directory /usr/local/bin is not writable. I recommend that you make it writable. See https://github.com/rstudio/tinytex/issues/24 for more info.
## tlmgr install multirow
ess <- read_fst("~/Desktop/SOC202 Final Project/All-ESS-Data.fst")
Variable Info:
how happy are you (outcome): https://ess.sikt.no/en/datafile/b2b0bf39-176b-4eca-8d26-3c05ea83d2cb/266?tab=1&elements=[%22a5c95af9-6037-4e52-9156-2ceac708eb3f/4%22]
highest level of education (main predictor): https://ess.sikt.no/en/datafile/2dc0634d-eb82-47c1-9e7a-83c9ec2e9130/6?tab=1&elements=[%22649fc6a7-858f-4743-955d-20a7568f3afb/1%22] https://ess.sikt.no/en/datafile/f37d014a-6958-42d4-b03b-17c29e481d3d/256?tab=1&elements=[%22a7895188-56aa-46c1-93b6-2af6d966b2b5/8%22]
satisfaction with democracy (secondary predictor): https://ess.sikt.no/en/datafile/a93fed5b-3858-4e86-bdae-dfcf5bbc9bf9/23?tab=1&elements=[%22eb1d3374-0cd8-4324-98ad-a36141dc9587/2%22]
#filtering to our country of interest
finland_data <- ess %>%
filter(cntry == "FI") %>%
mutate(
#cleaning our outcome happy
happy = ifelse(happy %in% c(77, 88, 99), NA, happy),
#cleaning our main predictor education level
edulvla = edulvla %>%
na_if(55) %>%
na_if(77) %>%
na_if(88) %>%
na_if(99),
edulvlb = edulvlb %>%
na_if(5555) %>%
na_if(7777) %>%
na_if(8888) %>%
na_if(9999),
#changed BA and No BA to a numeric for later coding
educ.ba = case_when(
essround < 5 & edulvla == 5 ~ 1,
essround >= 5 & edulvlb > 600 ~ 1,
TRUE ~ 0
),
#cleaning our secondary predictor satisfaction with democracy
stfdem = ifelse(stfdem %in% c(77, 88, 99), NA, stfdem)
)
# not a bad practice to create a duplicate in case you need to backtrack or use for different purposes
df <- finland_data
#making sure all variables are properly cleaned
finland_data <- finland_data %>% filter(!is.na(happy))
finland_data <- finland_data %>% filter(!is.na(educ.ba))
finland_data <- finland_data %>% filter(!is.na(stfdem))
table(finland_data$happy)
##
## 0 1 2 3 4 5 6 7 8 9 10
## 18 24 79 158 218 595 759 2577 6716 6036 1831
table(finland_data$educ.ba)
##
## 0 1
## 13591 5420
table(finland_data$stfdem)
##
## 0 1 2 3 4 5 6 7 8 9 10
## 159 202 419 853 1261 2224 2495 4512 4675 1864 347
#Graphs for outcome over time
#putting our data year range from 2002-2020
df$year <- NA
replacements <- c(2002, 2004, 2006, 2008, 2010, 2012, 2014, 2016, 2018, 2020)
for(i in 1:10){
df$year[df$essround == i] <- replacements[i]
}
happy_avg <- df %>%
group_by(year) %>%
summarize(happy_avg = mean(happy, na.rm = TRUE))
plot_zoom_happy <- ggplot(happy_avg, aes(x = year, y = happy_avg)) +
geom_point(aes(size = happy_avg, color = happy_avg), alpha = 0.7) +
geom_line(aes(group = 1), color = "blue", linetype = "dashed") +
labs(title = "Happy Average by Year (Finland)",
x = "Survey Year",
y = "Happy Average") +
theme_minimal() +
theme(legend.position = "none")
print(plot_zoom_happy)
plot_full_scale_happy <- ggplot(happy_avg, aes(x = year, y = happy_avg)) +
geom_point(aes(size = happy_avg, color = happy_avg), alpha = 0.7) +
geom_line(aes(group = 1), color = "blue", linetype = "dashed") +
labs(title = "Happy Average by Year (Finland)",
x = "Survey Year",
y = "Happy Average") +
theme_minimal() +
theme(legend.position = "none") +
scale_y_continuous(limits = c(0, 10))
print(plot_full_scale_happy)
The two graphs show that the Finnish are consistently highly happy over
the near two decade time span.
#need to delete some data to prevent vector memory exhauseted error
rm(finland_data)
rm(plot_full_scale_happy)
rm(plot_zoom_happy)
rm(happy_avg)
rm(df)
#Finland average happiness score compared to other countries
ess$year <- NA
replacements <- c(2002, 2004, 2006, 2008, 2010, 2012, 2014, 2016, 2018, 2020)
for(i in 1:10){
ess$year[ess$essround == i] <- replacements[i]
}
ess <- ess %>%
mutate(
#cleaning our outcome happy
happy = ifelse(happy %in% c(77, 88, 99), NA, happy),
#cleaning our main predictor education level
edulvla = case_when(
essround < 5 & edulvla == 55 ~ NA_real_,
TRUE ~ edulvla
),
edulvlb = case_when(
essround >= 5 & edulvlb == 5555 ~ NA_real_,
TRUE ~ edulvlb
),
educ_level = case_when(
essround < 5 & edulvla == 5 ~ "BA",
essround >= 5 & edulvlb > 600 ~ "BA",
TRUE ~ "No BA"
),
#cleaning our secondary predictor satisfaction with democracy
stfdem = ifelse(stfdem %in% c(77, 88, 99), NA, stfdem)
)
ess_cleaned <- ess %>%
mutate(
happy = happy,
year = as.factor(year) # Ensure year is a factor
) %>%
filter(!is.na(happy))
# Calculating yearly averages for each country
yearly_happy_avg <- ess_cleaned %>%
group_by(cntry, year) %>%
summarize(avg_happy = mean(happy), .groups = 'drop')
# Subset for specific countries and overall ESS baseline
specific_countries_happy <- yearly_happy_avg %>%
filter(cntry %in% c("HU", "FI", "ES", "DE", "SE"))
ess_baseline_happy <- ess_cleaned %>%
group_by(year) %>%
summarize(avg_happy = mean(happy), .groups = 'drop') %>%
mutate(cntry = "ESS Baseline")
# Combining specific countries with ESS baseline
combined_data_happy <- rbind(specific_countries_happy, ess_baseline_happy)
# Plotting the spaghetti plot for happy
ggplot(combined_data_happy, aes(x = year, y = avg_happy, group = cntry, color = cntry)) +
geom_line() +
scale_color_manual(values = c("HU" = "darkgreen", "FI" = "red", "ES" = "yellow", "DE" = "orange", "SE" = "blue", "ESS Baseline" = "black")) +
labs(title = "Average Happy Score by Year for Selected Countries and ESS Baseline",
x = "Year",
y = "Happy Score") +
theme_minimal() +
theme(legend.position = "right")
Here we can note that Finland is noticeably higher than the ESS
baseline. It also shows that Finland has the highest average score of
all countries regardless of the year and is the most consistent as
well.
#need to delete some data to prevent vector memory exhauseted error
rm(combined_data_happy)
rm(ess_baseline_happy)
rm(ess_cleaned)
rm(specific_countries_happy)
rm(yearly_happy_avg)
#Adding back previosuly deleted data from vector memory exhausted error
#filtering to our country of interest
finland_data <- ess %>%
filter(cntry == "FI") %>%
mutate(
#cleaning our outcome happy
happy = ifelse(happy %in% c(77, 88, 99), NA, happy),
#cleaning our main predictor education level
edulvla = edulvla %>%
na_if(55) %>%
na_if(77) %>%
na_if(88) %>%
na_if(99),
edulvlb = edulvlb %>%
na_if(5555) %>%
na_if(7777) %>%
na_if(8888) %>%
na_if(9999),
#changed BA and No BA to a numeric for later coding
educ.ba = case_when(
essround < 5 & edulvla == 5 ~ 1,
essround >= 5 & edulvlb > 600 ~ 1,
TRUE ~ 0
),
#cleaning our secondary predictor satisfaction with democracy
stfdem = ifelse(stfdem %in% c(77, 88, 99), NA, stfdem)
)
# not a bad practice to create a duplicate in case you need to backtrack or use for different purposes
df <- finland_data
#making sure all variables are properly cleaned
finland_data <- finland_data %>% filter(!is.na(happy))
finland_data <- finland_data %>% filter(!is.na(educ.ba))
finland_data <- finland_data %>% filter(!is.na(stfdem))
table(finland_data$happy)
##
## 0 1 2 3 4 5 6 7 8 9 10
## 18 24 79 158 218 595 759 2577 6716 6036 1831
table(finland_data$educ.ba)
##
## 0 1
## 13591 5420
table(finland_data$stfdem)
##
## 0 1 2 3 4 5 6 7 8 9 10
## 159 202 419 853 1261 2224 2495 4512 4675 1864 347
rm(ess)
#Creating a data summary table
d <- df %>%
# Convert factors/characters to numeric
mutate(
happy = as.numeric(happy),
stfdem = as.numeric(stfdem),
educ_ba_num = as.numeric(educ.ba)
)
table1 <- datasummary_skim(d %>% dplyr::select(happy, stfdem, educ_ba_num), output = "flextable")
## Warning: The histogram argument is only supported for (a) output types "default",
## "html", "kableExtra", or "gt"; (b) writing to file paths with extensions
## ".html", ".jpg", or ".png"; and (c) Rmarkdown, knitr or Quarto documents
## compiled to PDF (via kableExtra) or HTML (via kableExtra or gt). Use
## `histogram=FALSE` to silence this warning.
table1
| Unique (#) | Missing (%) | Mean | SD | Min | Median | Max |
|---|---|---|---|---|---|---|---|
happy | 12 | 0 | 8.1 | 1.4 | 0.0 | 8.0 | 10.0 |
stfdem | 12 | 3 | 6.5 | 2.0 | 0.0 | 7.0 | 10.0 |
educ_ba_num | 2 | 0 | 0.3 | 0.4 | 0.0 | 0.0 | 1.0 |
#relabeling and titling our data summary table
d <- d %>%
rename(
`Happiness` = happy,
`Satisfaction with Democracy` = stfdem,
`Educational Attainment` = educ_ba_num
)
table1_v2 <- datasummary_skim(d %>% dplyr::select(`Happiness`,`Satisfaction with Democracy`, `Educational Attainment`), title = "Descriptive Statistics of Variables", output = "flextable")
## Warning: The histogram argument is only supported for (a) output types "default",
## "html", "kableExtra", or "gt"; (b) writing to file paths with extensions
## ".html", ".jpg", or ".png"; and (c) Rmarkdown, knitr or Quarto documents
## compiled to PDF (via kableExtra) or HTML (via kableExtra or gt). Use
## `histogram=FALSE` to silence this warning.
table1_v2
| Unique (#) | Missing (%) | Mean | SD | Min | Median | Max |
|---|---|---|---|---|---|---|---|
Happiness | 12 | 0 | 8.1 | 1.4 | 0.0 | 8.0 | 10.0 |
Satisfaction with Democracy | 12 | 3 | 6.5 | 2.0 | 0.0 | 7.0 | 10.0 |
Educational Attainment | 2 | 0 | 0.3 | 0.4 | 0.0 | 0.0 | 1.0 |
#saving table 1 as a pdf
flextable::save_as_docx(table1_v2, path = "summary_table1_v2.docx", # change name to whatever you would like
width = 7.0, height = 7.0) # play around with dimensions
set_flextable_defaults(fonts_ignore=TRUE)
print(table1_v2, preview = "pdf")
## a flextable object.
## col_keys: ` `, `Unique (#)`, `Missing (%)`, `Mean`, `SD`, `Min`, `Median`, `Max`
## header has 1 row(s)
## body has 3 row(s)
## original dataset sample:
## Unique (#) Missing (%) Mean SD Min Median Max
## 1 Happiness 12 0 8.1 1.4 0.0 8.0 10.0
## 2 Satisfaction with Democracy 12 3 6.5 2.0 0.0 7.0 10.0
## 3 Educational Attainment 2 0 0.3 0.4 0.0 0.0 1.0
#Hypothesis Testing with Infer
#subsetting to our country of interest
df <- finland_data %>%
filter(cntry == "FI") %>%
mutate(
#cleaning our outcome happy
happy = ifelse(happy %in% c(77, 88, 99), NA, happy),
#cleaning our main predictor education level
edulvla = edulvla %>%
na_if(55) %>%
na_if(77) %>%
na_if(88) %>%
na_if(99),
edulvlb = edulvlb %>%
na_if(5555) %>%
na_if(7777) %>%
na_if(8888) %>%
na_if(9999),
#changing educ.ba back to a categorical variable in order to run t test
educ.ba = case_when(
essround < 5 & edulvla == 5 ~ 'BA',
essround >= 5 & edulvlb > 600 ~ 'BA',
TRUE ~ 'No BA'
),
#cleaning our secondary filter satisfaction with democracy
stfdem = ifelse(stfdem %in% c(77, 88, 99), NA, stfdem)
)
test_stat <- df %>%
specify(explanatory = educ.ba, # change variable name for explanatory variable
response = happy) %>% # change variable name for outcome of interest
hypothesize(null = "independence") %>%
calculate(stat = "t") # replace in between quotation marks appropriate test statistic
## Warning: The statistic is based on a difference or ratio; by default, for
## difference-based statistics, the explanatory variable is subtracted in the
## order "BA" - "No BA", or divided in the order "BA" / "No BA" for ratio-based
## statistics. To specify this order yourself, supply `order = c("BA", "No BA")`
## to the calculate() function.
print(test_stat$stat)
## t
## 11.45481
A t-statistic of 11.45481 is quite large, indicating a significant difference in happiness between those with a BA and those without a BA. It suggests that the mean happiness is different for the two groups.
null_dist <- df %>%
specify(explanatory = educ.ba,
response = happy) %>%
hypothesize(null = "independence") %>%
generate(reps = 1000, type = "permute") %>% # only line we are adding, use reps = 1000 as standard
calculate(stat = "t")
## Warning: The statistic is based on a difference or ratio; by default, for
## difference-based statistics, the explanatory variable is subtracted in the
## order "BA" - "No BA", or divided in the order "BA" / "No BA" for ratio-based
## statistics. To specify this order yourself, supply `order = c("BA", "No BA")`
## to the calculate() function.
null_dist
## Response: happy (numeric)
## Explanatory: educ.ba (factor)
## Null Hypothesis: independence
## # A tibble: 1,000 × 2
## replicate stat
## <int> <dbl>
## 1 1 0.143
## 2 2 0.537
## 3 3 -0.845
## 4 4 0.402
## 5 5 -2.13
## 6 6 -0.0297
## 7 7 -0.366
## 8 8 0.371
## 9 9 -0.420
## 10 10 -0.624
## # ℹ 990 more rows
p_val <- null_dist %>%
get_pvalue(obs_stat = test_stat, direction = "two-sided")
## Warning: Please be cautious in reporting a p-value of 0. This result is an
## approximation based on the number of `reps` chosen in the `generate()` step.
## See `?get_p_value()` for more information.
p_val
## # A tibble: 1 × 1
## p_value
## <dbl>
## 1 0
A p-value of 0 means there is strong evidence to reject the null hypothesis.
null_dist %>%
visualize() +
shade_p_value(obs_stat = test_stat, direction = "two-sided")
null_dist
## Response: happy (numeric)
## Explanatory: educ.ba (factor)
## Null Hypothesis: independence
## # A tibble: 1,000 × 2
## replicate stat
## <int> <dbl>
## 1 1 0.143
## 2 2 0.537
## 3 3 -0.845
## 4 4 0.402
## 5 5 -2.13
## 6 6 -0.0297
## 7 7 -0.366
## 8 8 0.371
## 9 9 -0.420
## 10 10 -0.624
## # ℹ 990 more rows
Visualizes the null distribution alongside the observsed test statistic (the red line), indicating here clear evidence, under assumptions of frequentist hypothesis testing, to reject the null.
conf_int <- null_dist %>%
get_confidence_interval(level = 0.95, type = "percentile")
null_dist %>%
visualize() +
shade_p_value(obs_stat = test_stat, direction = "two-sided") +
shade_confidence_interval(endpoints = conf_int)+
labs(title = "Figure 1. Simulation-Based Null Distribution",
x = "T statistic",
y = "Count")
null_dist
## Response: happy (numeric)
## Explanatory: educ.ba (factor)
## Null Hypothesis: independence
## # A tibble: 1,000 × 2
## replicate stat
## <int> <dbl>
## 1 1 0.143
## 2 2 0.537
## 3 3 -0.845
## 4 4 0.402
## 5 5 -2.13
## 6 6 -0.0297
## 7 7 -0.366
## 8 8 0.371
## 9 9 -0.420
## 10 10 -0.624
## # ℹ 990 more rows
With a 95% confidence interval we can see strong evidence to reject the null hypothesis as the test statistic falls way outside the simulated T Null Distribution.
#Regression Modelling
We need to use modelsummary here and specify three models (single, multivariate, interaction)
df$weight <- df$dweight * df$pweight
model1 <- lm(happy ~ educ.ba, data = df, weights = weight)
model2 <- lm(happy ~ educ.ba + stfdem, data = df, weights = weight)
model3 <- lm(happy ~ educ.ba + stfdem*educ.ba, data = df, weights = weight)
modelsummary(
list(model1, model2, model3),
fmt = 1,
estimate = c( "{estimate} ({std.error}){stars}",
"{estimate} ({std.error}){stars}",
"{estimate} ({std.error}){stars}"),
statistic = NULL,
coef_omit = "Intercept")
| Â (1) | Â Â (2) | Â Â (3) | |
|---|---|---|---|
| educ.baNo BA | −0.2 (0.0)*** | −0.1 (0.0)*** | −0.5 (0.1)*** |
| stfdem | 0.2 (0.0)*** | 0.1 (0.0)*** | |
| educ.baNo BA × stfdem | 0.0 (0.0)*** | ||
| Num.Obs. | 19011 | 19011 | 19011 |
| R2 | 0.006 | 0.055 | 0.055 |
| R2 Adj. | 0.006 | 0.055 | 0.055 |
| AIC | 66550.2 | 65601.6 | 65587.8 |
| BIC | 66573.7 | 65633.0 | 65627.1 |
| Log.Lik. | −33272.089 | −32796.802 | −32788.901 |
| RMSE | 1.39 | 1.35 | 1.35 |
#Creating a boxplot
df <- finland_data %>%
mutate(
happy = ifelse(happy %in% c(77, 88, 99), NA, happy),
#cleaning our main predictor education level
edulvla = edulvla %>%
na_if(55) %>%
na_if(77) %>%
na_if(88) %>%
na_if(99),
edulvlb = edulvlb %>%
na_if(5555) %>%
na_if(7777) %>%
na_if(8888) %>%
na_if(9999),
educ.ba = case_when(
essround < 5 & edulvla == 5 ~ 'BA',
essround >= 5 & edulvlb > 600 ~ 'BA',
TRUE ~ 'No BA'
)
)
figure2plot <- ggplot(df, aes(x = reorder(educ.ba, -happy, FUN=median), y = happy, fill = educ.ba)) +
geom_boxplot() +
theme_minimal() +
theme(legend.position = "none") +
labs(title = "Figure 2. Boxplot Comparison for Happiness and Educational Attainment (Finland)",
x = "Educational Attainment",
y = " Happiness Scale (0-10)")
figure2plot
ggsave(filename = "figure2plot.pdf", plot = figure2plot, device = "pdf", width = 8, height = 6)