# Required packages
packages <- c(
"tidyverse",
# For data manipulation and ggplot2
"gt",
# For formatted tables
"gapminder",
# For gapminder dataset
"srvyr",
# For reading ESS data
"ggridges"
# For density ridge plots
)
# Install and load packages
new_packages <- packages[!(packages %in% installed.packages()[,"Package"])]
if(length(new_packages)) install.packages(new_packages)
lapply(packages, library, character.only = TRUE)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.1 ✔ tibble 3.2.1
## ✔ lubridate 1.9.4 ✔ tidyr 1.3.1
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
##
## Attaching package: 'srvyr'
##
##
## The following object is masked from 'package:stats':
##
## filter
## [[1]]
## [1] "lubridate" "forcats" "stringr" "dplyr" "purrr" "readr"
## [7] "tidyr" "tibble" "ggplot2" "tidyverse" "stats" "graphics"
## [13] "grDevices" "utils" "datasets" "methods" "base"
##
## [[2]]
## [1] "gt" "lubridate" "forcats" "stringr" "dplyr" "purrr"
## [7] "readr" "tidyr" "tibble" "ggplot2" "tidyverse" "stats"
## [13] "graphics" "grDevices" "utils" "datasets" "methods" "base"
##
## [[3]]
## [1] "gapminder" "gt" "lubridate" "forcats" "stringr" "dplyr"
## [7] "purrr" "readr" "tidyr" "tibble" "ggplot2" "tidyverse"
## [13] "stats" "graphics" "grDevices" "utils" "datasets" "methods"
## [19] "base"
##
## [[4]]
## [1] "srvyr" "gapminder" "gt" "lubridate" "forcats" "stringr"
## [7] "dplyr" "purrr" "readr" "tidyr" "tibble" "ggplot2"
## [13] "tidyverse" "stats" "graphics" "grDevices" "utils" "datasets"
## [19] "methods" "base"
##
## [[5]]
## [1] "ggridges" "srvyr" "gapminder" "gt" "lubridate" "forcats"
## [7] "stringr" "dplyr" "purrr" "readr" "tidyr" "tibble"
## [13] "ggplot2" "tidyverse" "stats" "graphics" "grDevices" "utils"
## [19] "datasets" "methods" "base"
options(repos = c(CRAN = "https://cloud.r-project.org/"))
install.packages("fst")
## Installing package into 'C:/Users/Madeline/AppData/Local/R/win-library/4.4'
## (as 'lib' is unspecified)
## package 'fst' successfully unpacked and MD5 sums checked
## Warning: cannot remove prior installation of package 'fst'
## Warning in file.copy(savedcopy, lib, recursive = TRUE): problem copying
## C:\Users\Madeline\AppData\Local\R\win-library\4.4\00LOCK\fst\libs\x64\fst.dll
## to C:\Users\Madeline\AppData\Local\R\win-library\4.4\fst\libs\x64\fst.dll:
## Permission denied
## Warning: restored 'fst'
##
## The downloaded binary packages are in
## C:\Users\Madeline\AppData\Local\Temp\Rtmpc3NZyu\downloaded_packages
library(dplyr)
library(fst)
df <- read_fst("poland_data.fst")
df1 <- read_fst("sweden_data.fst")
library(dplyr)
df$country <- "Poland"
df1$country <- "Sweden"
df_combined <- bind_rows(df, df1)
combined_na <- df_combined %>%
filter(!is.na(netusoft) , !is.na(wrclmch)) %>%
filter(!(netusoft %in% c(7, 8, 9))) %>%
filter(!(wrclmch %in% c(6, 7, 8, 9)))
combined_answers <- combined_na %>%
mutate(netusoft = case_when(
netusoft == 1 ~ "Never",
netusoft == 2 ~ "Rarely",
netusoft == 3 ~ "Few times a month",
netusoft == 4 ~ "Few times a week",
netusoft == 5 ~ "Daily",
TRUE ~ as.character(netusoft)))
# Check for missing or unexpected values
# Convert to numeric if they are meant to be numbers
combined_answers$wrclmch <- as.numeric(as.character(combined_answers$wrclmch))
combined_answers$netusoft <- as.factor(combined_answers$netusoft) # Keep netusoft as categorical
# Verify the conversion
str(combined_answers$wrclmch)
## num [1:7347] 3 3 3 1 3 3 3 5 3 4 ...
str(combined_answers$netusoft)
## Factor w/ 5 levels "Daily","Few times a month",..: 2 1 1 4 4 3 4 1 1 2 ...
library(ggplot2)
library(dplyr)
library(tidyr)
# Ensure missing values are dropped
cleaned_data <- combined_answers %>%
drop_na(netusoft, wrclmch) # Remove any NA values
# Create the baseline plot
baseline_plot <- cleaned_data %>%
group_by(country, netusoft) %>%
summarise(avg_climate_concern = mean(wrclmch, na.rm = TRUE), .groups = "drop") %>%
ggplot(aes(x = netusoft, y = avg_climate_concern, fill = country)) +
geom_bar(stat = "identity", position = "dodge") +
labs(
title = "Internet Usage vs. Climate Concern in Sweden & Poland",
x = "Internet Usage Frequency",
y = "Average Climate Concern"
) +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) # Rotate x-axis labels for readability
# Display the plot
print(baseline_plot)

library(ggplot2)
library(dplyr)
library(tidyr)
# Summarize average climate concern by region and country
regional_data <- combined_answers %>%
drop_na(netusoft, wrclmch, region) %>%
group_by(country, region) %>%
summarise(avg_climate_concern = mean(wrclmch, na.rm = TRUE), .groups = "drop")
# View summary
print(regional_data)
## # A tibble: 52 × 3
## country region avg_climate_concern
## <chr> <chr> <dbl>
## 1 Poland PL11 2.59
## 2 Poland PL12 2.82
## 3 Poland PL21 2.86
## 4 Poland PL22 3.13
## 5 Poland PL31 2.74
## 6 Poland PL32 2.47
## 7 Poland PL33 2.4
## 8 Poland PL34 2.78
## 9 Poland PL41 3.10
## 10 Poland PL42 3.08
## # ℹ 42 more rows
# Filter data separately for Poland and Sweden
regional_poland <- regional_data %>% filter(country == "Poland")
regional_sweden <- regional_data %>% filter(country == "Sweden")
library(tidyr)
library(dplyr)
# Pivot separately for Poland
regional_poland_wide <- regional_poland %>%
pivot_wider(names_from = country, values_from = avg_climate_concern)
# Pivot separately for Sweden
regional_sweden_wide <- regional_sweden %>%
pivot_wider(names_from = country, values_from = avg_climate_concern)
# Print results
print(regional_poland_wide)
## # A tibble: 23 × 2
## region Poland
## <chr> <dbl>
## 1 PL11 2.59
## 2 PL12 2.82
## 3 PL21 2.86
## 4 PL22 3.13
## 5 PL31 2.74
## 6 PL32 2.47
## 7 PL33 2.4
## 8 PL34 2.78
## 9 PL41 3.10
## 10 PL42 3.08
## # ℹ 13 more rows
print(regional_sweden_wide)
## # A tibble: 29 × 2
## region Sweden
## <chr> <dbl>
## 1 SE11 3.23
## 2 SE110 2.87
## 3 SE12 3.07
## 4 SE121 2.87
## 5 SE122 2.74
## 6 SE123 2.85
## 7 SE124 2.95
## 8 SE125 2.55
## 9 SE21 3.04
## 10 SE211 2.69
## # ℹ 19 more rows
library(ggplot2)
# Plot for Poland
poland_plot <- ggplot(regional_poland, aes(x = reorder(region, avg_climate_concern), y = avg_climate_concern)) +
geom_bar(stat = "identity", fill = "red", alpha = 0.7) +
coord_flip() + # Rotate for readability
labs(
title = "Climate Concern Across Regions in Poland",
x = "Region",
y = "Average Climate Concern"
) +
theme_minimal()
# Plot for Sweden
sweden_plot <- ggplot(regional_sweden, aes(x = reorder(region, avg_climate_concern), y = avg_climate_concern)) +
geom_bar(stat = "identity", fill = "blue", alpha = 0.7) +
coord_flip() + # Rotate for readability
labs(
title = "Climate Concern Across Regions in Sweden",
x = "Region",
y = "Average Climate Concern"
) +
theme_minimal()
# Display the plots
print(poland_plot)

print(sweden_plot)

education_distribution <- combined_answers %>%
drop_na(region, eisced) %>%
filter(!(eisced %in% c(55,77,88,99)))%>%
# Remove missing values
mutate(eisced = case_when(
eisced <= 2 ~ "Lower secondary or less",
eisced <= 4 ~ "Upper secondary",
TRUE ~ "Tertiary"
),
education = factor(eisced,
levels = c("Lower secondary or less",
"Upper secondary",
"Tertiary")))%>%
group_by(country, region, eisced) %>%
summarise(count = n(), .groups = "drop") %>%
mutate(percentage = count / sum(count) * 100)
print(education_distribution)
## # A tibble: 156 × 5
## country region eisced count percentage
## <chr> <chr> <chr> <int> <dbl>
## 1 Poland PL11 Lower secondary or less 43 0.594
## 2 Poland PL11 Tertiary 38 0.525
## 3 Poland PL11 Upper secondary 39 0.539
## 4 Poland PL12 Lower secondary or less 71 0.981
## 5 Poland PL12 Tertiary 62 0.857
## 6 Poland PL12 Upper secondary 61 0.843
## 7 Poland PL21 Lower secondary or less 112 1.55
## 8 Poland PL21 Tertiary 108 1.49
## 9 Poland PL21 Upper secondary 99 1.37
## 10 Poland PL22 Lower secondary or less 133 1.84
## # ℹ 146 more rows
# Filter for Poland
education_poland <- education_distribution %>%
filter(country == "Poland")
# Filter for Sweden
education_sweden <- education_distribution %>%
filter(country == "Sweden")
# View summary
head(education_poland)
## # A tibble: 6 × 5
## country region eisced count percentage
## <chr> <chr> <chr> <int> <dbl>
## 1 Poland PL11 Lower secondary or less 43 0.594
## 2 Poland PL11 Tertiary 38 0.525
## 3 Poland PL11 Upper secondary 39 0.539
## 4 Poland PL12 Lower secondary or less 71 0.981
## 5 Poland PL12 Tertiary 62 0.857
## 6 Poland PL12 Upper secondary 61 0.843
head(education_sweden)
## # A tibble: 6 × 5
## country region eisced count percentage
## <chr> <chr> <chr> <int> <dbl>
## 1 Sweden SE11 Lower secondary or less 78 1.08
## 2 Sweden SE11 Tertiary 319 4.41
## 3 Sweden SE11 Upper secondary 111 1.53
## 4 Sweden SE110 Lower secondary or less 28 0.387
## 5 Sweden SE110 Tertiary 184 2.54
## 6 Sweden SE110 Upper secondary 64 0.885
library(ggplot2)
education_poland_plot <- ggplot(education_poland, aes(x = region, y = percentage, fill = factor(eisced))) +
geom_bar(stat = "identity", position = "fill") +
coord_flip() + # Rotate for readability
labs(
title = "Education Level Distribution by Region in Poland",
x = "Region",
y = "Proportion of Education Levels",
fill = "Education Level"
) +
theme_minimal()
# Display the plot
print(education_poland_plot)

education_sweden_plot <- ggplot(education_sweden, aes(x = region, y = percentage, fill = factor(eisced))) +
geom_bar(stat = "identity", position = "fill") +
coord_flip() + # Rotate for readability
labs(
title = "Education Level Distribution by Region in Sweden",
x = "Region",
y = "Proportion of Education Levels",
fill = "Education Level"
) +
theme_minimal()
# Display the plot
print(education_sweden_plot)

library(dplyr)
# Compute average climate concern per region (Poland)
climate_avg_poland <- regional_data %>%
filter(country == "Poland") %>%
group_by(region) %>%
summarise(avg_climate_concern = mean(avg_climate_concern, na.rm = TRUE), .groups = "drop")
# Compute average climate concern per region (Sweden)
climate_avg_sweden <- regional_data %>%
filter(country == "Sweden") %>%
group_by(region) %>%
summarise(avg_climate_concern = mean(avg_climate_concern, na.rm = TRUE), .groups = "drop")
# Compute percentage of tertiary education per region (Poland)
education_avg_poland <- education_distribution %>%
filter(country == "Poland", eisced == "Upper secondary") %>%
group_by(region) %>%
summarise(US_percentage = mean(percentage, na.rm = TRUE), .groups = "drop")
education_avg_poland_tertiary <- education_distribution %>%
filter(country == "Poland", eisced == "Tertiary") %>%
group_by(region) %>%
summarise(US_percentage = mean(percentage, na.rm = TRUE), .groups = "drop")
education_avg_poland_lower <- education_distribution %>%
filter(country == "Poland", eisced == "Lower secondary or less") %>%
group_by(region) %>%
summarise(US_percentage = mean(percentage, na.rm = TRUE), .groups = "drop")
# Compute percentage of tertiary education per region (Sweden)
education_avg_sweden <- education_distribution %>%
filter(country == "Sweden", eisced == "Upper secondary") %>%
group_by(region) %>%
summarise(US_percentage = mean(percentage, na.rm = TRUE), .groups = "drop")
education_avg_sweden_tertiary <- education_distribution %>%
filter(country == "Sweden", eisced == "Tertiary") %>%
group_by(region) %>%
summarise(US_percentage = mean(percentage, na.rm = TRUE), .groups = "drop")
education_avg_sweden_lower <- education_distribution %>%
filter(country == "sweden", eisced == "Lower secondary or less") %>%
group_by(region) %>%
summarise(US_percentage = mean(percentage, na.rm = TRUE), .groups = "drop")
# Merge both datasets for Poland
education_climate_poland <- climate_avg_poland %>%
inner_join(education_avg_poland, by = "region")
# Merge both datasets for Sweden
education_climate_sweden <- climate_avg_sweden %>%
inner_join(education_avg_sweden, by = "region")
education_climate_poland <- climate_avg_poland %>%
inner_join(education_avg_poland, by = "region")
# Merge both datasets for Sweden
education_climate_swedent <- climate_avg_sweden %>%
inner_join(education_avg_sweden_tertiary, by = "region")
education_climate_polandt <- climate_avg_poland %>%
inner_join(education_avg_poland_tertiary, by = "region")
# Merge both datasets for Sweden
education_climate_swedenu <- climate_avg_sweden %>%
inner_join(education_avg_sweden_lower, by = "region")
education_climate_polandu <- climate_avg_sweden %>%
inner_join(education_avg_poland_lower, by = "region")
# View the merged data for Poland & Sweden
print(education_climate_poland)
## # A tibble: 23 × 3
## region avg_climate_concern US_percentage
## <chr> <dbl> <dbl>
## 1 PL11 2.59 0.539
## 2 PL12 2.82 0.843
## 3 PL21 2.86 1.37
## 4 PL22 3.13 1.95
## 5 PL31 2.74 0.304
## 6 PL32 2.47 0.456
## 7 PL33 2.4 0.166
## 8 PL34 2.78 0.221
## 9 PL41 3.10 1.29
## 10 PL42 3.08 0.415
## # ℹ 13 more rows
print(education_climate_sweden)
## # A tibble: 29 × 3
## region avg_climate_concern US_percentage
## <chr> <dbl> <dbl>
## 1 SE11 3.23 1.53
## 2 SE110 2.87 0.885
## 3 SE12 3.07 1.69
## 4 SE121 2.87 0.180
## 5 SE122 2.74 0.263
## 6 SE123 2.85 0.428
## 7 SE124 2.95 0.235
## 8 SE125 2.55 0.180
## 9 SE21 3.04 0.677
## 10 SE211 2.69 0.111
## # ℹ 19 more rows
library(ggplot2)
poland_plot <- ggplot(education_climate_poland, aes(x = US_percentage, y = avg_climate_concern, label = region)) +
geom_point(color = "red", size = 3) +
geom_text(vjust = 1.5, hjust = 1, size = 3) +
labs(
title = "Education Level vs Climate Concern in Poland",
x = "Percentage of Upper Secondary Education",
y = "Average Climate Concern"
) +
theme_minimal()
poland_plot1 <- ggplot(education_climate_polandt, aes(x = US_percentage, y = avg_climate_concern, label = region)) +
geom_point(color = "red", size = 3) +
geom_text(vjust = 1.5, hjust = 1, size = 3) +
labs(
title = "Education Level vs Climate Concern in Poland",
x = "Percentage of tertiary Education",
y = "Average Climate Concern"
) +
theme_minimal()
poland_plot2 <- ggplot(education_climate_polandu, aes(x = US_percentage, y = avg_climate_concern, label = region)) +
geom_point(color = "red", size = 3) +
geom_text(vjust = 1.5, hjust = 1, size = 3) +
labs(
title = "Education Level vs Climate Concern in Poland",
x = "Percentage of Low Education",
y = "Average Climate Concern"
) +
theme_minimal()
print(poland_plot)

print(poland_plot1)

sweden_plot <- ggplot(education_climate_sweden, aes(x = US_percentage, y = avg_climate_concern, label = region)) +
geom_point(color = "blue", size = 3) +
geom_text(vjust = 1.5, hjust = 1, size = 3) +
labs(
title = "Education Level vs Climate Concern in Sweden",
x = "Percentage of Upper seonndary Education",
y = "Average Climate Concern"
) +
theme_minimal()
# Scatter plot for Tertiary Education vs Climate Concern in Sweden
sweden_plot1 <- ggplot(education_climate_swedent, aes(x = US_percentage, y = avg_climate_concern, label = region)) +
geom_point(color = "blue", size = 3) +
geom_text(vjust = 1.5, hjust = 1, size = 3) +
labs(
title = "Education Level vs Climate Concern in Sweden",
x = "Percentage of Tertiary Education",
y = "Average Climate Concern"
) +
theme_minimal()
# Scatter plot for Lower Secondary Education vs Climate Concern in Sweden
sweden_plot2 <- ggplot(education_climate_swedenu, aes(x = US_percentage, y = avg_climate_concern, label = region)) +
geom_point(color = "blue", size = 3) +
geom_text(vjust = 1.5, hjust = 1, size = 3) +
labs(
title = "Education Level vs Climate Concern in Sweden",
x = "Percentage of Low Education",
y = "Average Climate Concern"
) +
theme_minimal()
# Print plots for Sweden
print(sweden_plot)

print(sweden_plot1)
