Initial Analysis

  # Required packages 
packages <- c( 
"tidyverse",    
# For data manipulation and ggplot2 
"gt",           
# For formatted tables 
"gapminder",    
# For gapminder dataset 
"srvyr",          
# For reading ESS data 
"ggridges"      
# For density ridge plots 
) 
# Install and load packages 
new_packages <- packages[!(packages %in% installed.packages()[,"Package"])] 
if(length(new_packages)) install.packages(new_packages) 
lapply(packages, library, character.only = TRUE)

## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.1     ✔ tibble    3.2.1
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
## 
## Attaching package: 'srvyr'
## 
## 
## The following object is masked from 'package:stats':
## 
##     filter

## [[1]]
##  [1] "lubridate" "forcats"   "stringr"   "dplyr"     "purrr"     "readr"    
##  [7] "tidyr"     "tibble"    "ggplot2"   "tidyverse" "stats"     "graphics" 
## [13] "grDevices" "utils"     "datasets"  "methods"   "base"     
## 
## [[2]]
##  [1] "gt"        "lubridate" "forcats"   "stringr"   "dplyr"     "purrr"    
##  [7] "readr"     "tidyr"     "tibble"    "ggplot2"   "tidyverse" "stats"    
## [13] "graphics"  "grDevices" "utils"     "datasets"  "methods"   "base"     
## 
## [[3]]
##  [1] "gapminder" "gt"        "lubridate" "forcats"   "stringr"   "dplyr"    
##  [7] "purrr"     "readr"     "tidyr"     "tibble"    "ggplot2"   "tidyverse"
## [13] "stats"     "graphics"  "grDevices" "utils"     "datasets"  "methods"  
## [19] "base"     
## 
## [[4]]
##  [1] "srvyr"     "gapminder" "gt"        "lubridate" "forcats"   "stringr"  
##  [7] "dplyr"     "purrr"     "readr"     "tidyr"     "tibble"    "ggplot2"  
## [13] "tidyverse" "stats"     "graphics"  "grDevices" "utils"     "datasets" 
## [19] "methods"   "base"     
## 
## [[5]]
##  [1] "ggridges"  "srvyr"     "gapminder" "gt"        "lubridate" "forcats"  
##  [7] "stringr"   "dplyr"     "purrr"     "readr"     "tidyr"     "tibble"   
## [13] "ggplot2"   "tidyverse" "stats"     "graphics"  "grDevices" "utils"    
## [19] "datasets"  "methods"   "base"

options(repos = c(CRAN = "https://cloud.r-project.org/"))
install.packages("fst")

## Installing package into 'C:/Users/Madeline/AppData/Local/R/win-library/4.4'
## (as 'lib' is unspecified)

## package 'fst' successfully unpacked and MD5 sums checked

## Warning: cannot remove prior installation of package 'fst'

## Warning in file.copy(savedcopy, lib, recursive = TRUE): problem copying
## C:\Users\Madeline\AppData\Local\R\win-library\4.4\00LOCK\fst\libs\x64\fst.dll
## to C:\Users\Madeline\AppData\Local\R\win-library\4.4\fst\libs\x64\fst.dll:
## Permission denied

## Warning: restored 'fst'

## 
## The downloaded binary packages are in
##  C:\Users\Madeline\AppData\Local\Temp\Rtmpc3NZyu\downloaded_packages

library(dplyr)
library(fst)
df <- read_fst("poland_data.fst")
df1 <- read_fst("sweden_data.fst")

library(dplyr)
df$country <- "Poland"
df1$country <- "Sweden"

df_combined <- bind_rows(df, df1)

combined_na <- df_combined %>%
    filter(!is.na(netusoft) , !is.na(wrclmch)) %>%
    filter(!(netusoft %in% c(7, 8, 9))) %>%
    filter(!(wrclmch %in% c(6, 7, 8, 9)))

combined_answers <- combined_na %>%
 
   mutate(netusoft = case_when(
    netusoft == 1 ~ "Never",
    netusoft == 2 ~ "Rarely",
    netusoft == 3 ~ "Few times a month",
    netusoft == 4 ~ "Few times a week",
    netusoft == 5 ~ "Daily",
    TRUE ~ as.character(netusoft)))

# Check for missing or unexpected values
# Convert to numeric if they are meant to be numbers
combined_answers$wrclmch <- as.numeric(as.character(combined_answers$wrclmch))
combined_answers$netusoft <- as.factor(combined_answers$netusoft)  # Keep netusoft as categorical

# Verify the conversion
str(combined_answers$wrclmch)

##  num [1:7347] 3 3 3 1 3 3 3 5 3 4 ...

str(combined_answers$netusoft)

##  Factor w/ 5 levels "Daily","Few times a month",..: 2 1 1 4 4 3 4 1 1 2 ...

library(ggplot2)
library(dplyr)
library(tidyr)

# Ensure missing values are dropped
cleaned_data <- combined_answers %>%
  drop_na(netusoft, wrclmch)  # Remove any NA values

# Create the baseline plot
baseline_plot <- cleaned_data %>%
  group_by(country, netusoft) %>%
  summarise(avg_climate_concern = mean(wrclmch, na.rm = TRUE), .groups = "drop") %>%
  ggplot(aes(x = netusoft, y = avg_climate_concern, fill = country)) +
  geom_bar(stat = "identity", position = "dodge") +
  labs(
    title = "Internet Usage vs. Climate Concern in Sweden & Poland",
    x = "Internet Usage Frequency",
    y = "Average Climate Concern"
  ) +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))  # Rotate x-axis labels for readability

# Display the plot
print(baseline_plot)

library(ggplot2)
library(dplyr)
library(tidyr)

# Summarize average climate concern by region and country
regional_data <- combined_answers %>%
  drop_na(netusoft, wrclmch, region) %>%
  group_by(country, region) %>%
  summarise(avg_climate_concern = mean(wrclmch, na.rm = TRUE), .groups = "drop")

# View summary
print(regional_data)

## # A tibble: 52 × 3
##    country region avg_climate_concern
##    <chr>   <chr>                <dbl>
##  1 Poland  PL11                  2.59
##  2 Poland  PL12                  2.82
##  3 Poland  PL21                  2.86
##  4 Poland  PL22                  3.13
##  5 Poland  PL31                  2.74
##  6 Poland  PL32                  2.47
##  7 Poland  PL33                  2.4 
##  8 Poland  PL34                  2.78
##  9 Poland  PL41                  3.10
## 10 Poland  PL42                  3.08
## # ℹ 42 more rows

# Filter data separately for Poland and Sweden
regional_poland <- regional_data %>% filter(country == "Poland")
regional_sweden <- regional_data %>% filter(country == "Sweden")
library(tidyr)
library(dplyr)

# Pivot separately for Poland
regional_poland_wide <- regional_poland %>%
  pivot_wider(names_from = country, values_from = avg_climate_concern)

# Pivot separately for Sweden
regional_sweden_wide <- regional_sweden %>%
  pivot_wider(names_from = country, values_from = avg_climate_concern)

# Print results
print(regional_poland_wide)

## # A tibble: 23 × 2
##    region Poland
##    <chr>   <dbl>
##  1 PL11     2.59
##  2 PL12     2.82
##  3 PL21     2.86
##  4 PL22     3.13
##  5 PL31     2.74
##  6 PL32     2.47
##  7 PL33     2.4 
##  8 PL34     2.78
##  9 PL41     3.10
## 10 PL42     3.08
## # ℹ 13 more rows

print(regional_sweden_wide)

## # A tibble: 29 × 2
##    region Sweden
##    <chr>   <dbl>
##  1 SE11     3.23
##  2 SE110    2.87
##  3 SE12     3.07
##  4 SE121    2.87
##  5 SE122    2.74
##  6 SE123    2.85
##  7 SE124    2.95
##  8 SE125    2.55
##  9 SE21     3.04
## 10 SE211    2.69
## # ℹ 19 more rows

library(ggplot2)

# Plot for Poland
poland_plot <- ggplot(regional_poland, aes(x = reorder(region, avg_climate_concern), y = avg_climate_concern)) +
  geom_bar(stat = "identity", fill = "red", alpha = 0.7) +
  coord_flip() +  # Rotate for readability
  labs(
    title = "Climate Concern Across Regions in Poland",
    x = "Region",
    y = "Average Climate Concern"
  ) +
  theme_minimal()

# Plot for Sweden
sweden_plot <- ggplot(regional_sweden, aes(x = reorder(region, avg_climate_concern), y = avg_climate_concern)) +
  geom_bar(stat = "identity", fill = "blue", alpha = 0.7) +
  coord_flip() +  # Rotate for readability
  labs(
    title = "Climate Concern Across Regions in Sweden",
    x = "Region",
    y = "Average Climate Concern"
  ) +
  theme_minimal()

# Display the plots
print(poland_plot)

print(sweden_plot)

education_distribution <- combined_answers %>%
  drop_na(region, eisced) %>% 
  filter(!(eisced %in% c(55,77,88,99)))%>%
  # Remove missing values
  mutate(eisced = case_when(
      eisced <= 2 ~ "Lower secondary or less",
      eisced <= 4 ~ "Upper secondary",
      TRUE ~ "Tertiary"
    ),
    education = factor(eisced, 
                      levels = c("Lower secondary or less",
                               "Upper secondary", 
                               "Tertiary")))%>%
  group_by(country, region, eisced) %>%
  summarise(count = n(), .groups = "drop") %>%
  mutate(percentage = count / sum(count) * 100)
print(education_distribution)

## # A tibble: 156 × 5
##    country region eisced                  count percentage
##    <chr>   <chr>  <chr>                   <int>      <dbl>
##  1 Poland  PL11   Lower secondary or less    43      0.594
##  2 Poland  PL11   Tertiary                   38      0.525
##  3 Poland  PL11   Upper secondary            39      0.539
##  4 Poland  PL12   Lower secondary or less    71      0.981
##  5 Poland  PL12   Tertiary                   62      0.857
##  6 Poland  PL12   Upper secondary            61      0.843
##  7 Poland  PL21   Lower secondary or less   112      1.55 
##  8 Poland  PL21   Tertiary                  108      1.49 
##  9 Poland  PL21   Upper secondary            99      1.37 
## 10 Poland  PL22   Lower secondary or less   133      1.84 
## # ℹ 146 more rows

# Filter for Poland
education_poland <- education_distribution %>%
  filter(country == "Poland")

# Filter for Sweden
education_sweden <- education_distribution %>%
  filter(country == "Sweden")

# View summary
head(education_poland)

## # A tibble: 6 × 5
##   country region eisced                  count percentage
##   <chr>   <chr>  <chr>                   <int>      <dbl>
## 1 Poland  PL11   Lower secondary or less    43      0.594
## 2 Poland  PL11   Tertiary                   38      0.525
## 3 Poland  PL11   Upper secondary            39      0.539
## 4 Poland  PL12   Lower secondary or less    71      0.981
## 5 Poland  PL12   Tertiary                   62      0.857
## 6 Poland  PL12   Upper secondary            61      0.843

head(education_sweden)

## # A tibble: 6 × 5
##   country region eisced                  count percentage
##   <chr>   <chr>  <chr>                   <int>      <dbl>
## 1 Sweden  SE11   Lower secondary or less    78      1.08 
## 2 Sweden  SE11   Tertiary                  319      4.41 
## 3 Sweden  SE11   Upper secondary           111      1.53 
## 4 Sweden  SE110  Lower secondary or less    28      0.387
## 5 Sweden  SE110  Tertiary                  184      2.54 
## 6 Sweden  SE110  Upper secondary            64      0.885

library(ggplot2)

education_poland_plot <- ggplot(education_poland, aes(x = region, y = percentage, fill = factor(eisced))) +
  geom_bar(stat = "identity", position = "fill") +
  coord_flip() +  # Rotate for readability
  labs(
    title = "Education Level Distribution by Region in Poland",
    x = "Region",
    y = "Proportion of Education Levels",
    fill = "Education Level"
  ) +
  theme_minimal()

# Display the plot
print(education_poland_plot)

education_sweden_plot <- ggplot(education_sweden, aes(x = region, y = percentage, fill = factor(eisced))) +
  geom_bar(stat = "identity", position = "fill") +
  coord_flip() +  # Rotate for readability
  labs(
    title = "Education Level Distribution by Region in Sweden",
    x = "Region",
    y = "Proportion of Education Levels",
    fill = "Education Level"
  ) +
  theme_minimal()

# Display the plot
print(education_sweden_plot)

library(dplyr)

# Compute average climate concern per region (Poland)
climate_avg_poland <- regional_data %>%
  filter(country == "Poland") %>%
  group_by(region) %>%
  summarise(avg_climate_concern = mean(avg_climate_concern, na.rm = TRUE), .groups = "drop")

# Compute average climate concern per region (Sweden)
climate_avg_sweden <- regional_data %>%
  filter(country == "Sweden") %>%
  group_by(region) %>%
  summarise(avg_climate_concern = mean(avg_climate_concern, na.rm = TRUE), .groups = "drop")

# Compute percentage of tertiary education per region (Poland)
education_avg_poland <- education_distribution %>%
  filter(country == "Poland", eisced == "Upper secondary") %>%
  group_by(region) %>%
  summarise(US_percentage = mean(percentage, na.rm = TRUE), .groups = "drop")

education_avg_poland_tertiary <- education_distribution %>%
  filter(country == "Poland", eisced == "Tertiary") %>%
  group_by(region) %>%
  summarise(US_percentage = mean(percentage, na.rm = TRUE), .groups = "drop")

education_avg_poland_lower <- education_distribution %>%
  filter(country == "Poland", eisced == "Lower secondary or less") %>%
  group_by(region) %>%
  summarise(US_percentage = mean(percentage, na.rm = TRUE), .groups = "drop")

# Compute percentage of tertiary education per region (Sweden)
education_avg_sweden <- education_distribution %>%
  filter(country == "Sweden", eisced == "Upper secondary") %>%
  group_by(region) %>%
  summarise(US_percentage = mean(percentage, na.rm = TRUE), .groups = "drop")

education_avg_sweden_tertiary <- education_distribution %>%
  filter(country == "Sweden", eisced == "Tertiary") %>%
  group_by(region) %>%
  summarise(US_percentage = mean(percentage, na.rm = TRUE), .groups = "drop")

education_avg_sweden_lower <- education_distribution %>%
  filter(country == "sweden", eisced == "Lower secondary or less") %>%
  group_by(region) %>%
  summarise(US_percentage = mean(percentage, na.rm = TRUE), .groups = "drop")

# Merge both datasets for Poland
education_climate_poland <- climate_avg_poland %>%
  inner_join(education_avg_poland, by = "region")

# Merge both datasets for Sweden
education_climate_sweden <- climate_avg_sweden %>%
  inner_join(education_avg_sweden, by = "region")
education_climate_poland <- climate_avg_poland %>%
  inner_join(education_avg_poland, by = "region")

# Merge both datasets for Sweden
education_climate_swedent <- climate_avg_sweden %>%
  inner_join(education_avg_sweden_tertiary, by = "region")
education_climate_polandt <- climate_avg_poland %>%
  inner_join(education_avg_poland_tertiary, by = "region")

# Merge both datasets for Sweden
education_climate_swedenu <- climate_avg_sweden %>%
  inner_join(education_avg_sweden_lower, by = "region")
education_climate_polandu <- climate_avg_sweden %>%
  inner_join(education_avg_poland_lower, by = "region")

# View the merged data for Poland & Sweden
print(education_climate_poland)

## # A tibble: 23 × 3
##    region avg_climate_concern US_percentage
##    <chr>                <dbl>         <dbl>
##  1 PL11                  2.59         0.539
##  2 PL12                  2.82         0.843
##  3 PL21                  2.86         1.37 
##  4 PL22                  3.13         1.95 
##  5 PL31                  2.74         0.304
##  6 PL32                  2.47         0.456
##  7 PL33                  2.4          0.166
##  8 PL34                  2.78         0.221
##  9 PL41                  3.10         1.29 
## 10 PL42                  3.08         0.415
## # ℹ 13 more rows

print(education_climate_sweden)

## # A tibble: 29 × 3
##    region avg_climate_concern US_percentage
##    <chr>                <dbl>         <dbl>
##  1 SE11                  3.23         1.53 
##  2 SE110                 2.87         0.885
##  3 SE12                  3.07         1.69 
##  4 SE121                 2.87         0.180
##  5 SE122                 2.74         0.263
##  6 SE123                 2.85         0.428
##  7 SE124                 2.95         0.235
##  8 SE125                 2.55         0.180
##  9 SE21                  3.04         0.677
## 10 SE211                 2.69         0.111
## # ℹ 19 more rows

library(ggplot2)

poland_plot <- ggplot(education_climate_poland, aes(x = US_percentage, y = avg_climate_concern, label = region)) +
  geom_point(color = "red", size = 3) +
  geom_text(vjust = 1.5, hjust = 1, size = 3) +
  labs(
    title = "Education Level vs Climate Concern in Poland",
    x = "Percentage of Upper Secondary Education",
    y = "Average Climate Concern"
  ) +
  theme_minimal()

poland_plot1 <- ggplot(education_climate_polandt, aes(x = US_percentage, y = avg_climate_concern, label = region)) +
  geom_point(color = "red", size = 3) +
  geom_text(vjust = 1.5, hjust = 1, size = 3) +
  labs(
    title = "Education Level vs Climate Concern in Poland",
    x = "Percentage of tertiary Education",
    y = "Average Climate Concern"
  ) +
  theme_minimal()

poland_plot2 <- ggplot(education_climate_polandu, aes(x = US_percentage, y = avg_climate_concern, label = region)) +
  geom_point(color = "red", size = 3) +
  geom_text(vjust = 1.5, hjust = 1, size = 3) +
  labs(
    title = "Education Level vs Climate Concern in Poland",
    x = "Percentage of Low Education",
    y = "Average Climate Concern"
  ) +
  theme_minimal()

print(poland_plot)

print(poland_plot1)

sweden_plot <- ggplot(education_climate_sweden, aes(x = US_percentage, y = avg_climate_concern, label = region)) +
  geom_point(color = "blue", size = 3) +
  geom_text(vjust = 1.5, hjust = 1, size = 3) +
  labs(
    title = "Education Level vs Climate Concern in Sweden",
    x = "Percentage of Upper seonndary Education",
    y = "Average Climate Concern"
  ) +
  theme_minimal()
# Scatter plot for Tertiary Education vs Climate Concern in Sweden
sweden_plot1 <- ggplot(education_climate_swedent, aes(x = US_percentage, y = avg_climate_concern, label = region)) +
  geom_point(color = "blue", size = 3) +
  geom_text(vjust = 1.5, hjust = 1, size = 3) +
  labs(
    title = "Education Level vs Climate Concern in Sweden",
    x = "Percentage of Tertiary Education",
    y = "Average Climate Concern"
  ) +
  theme_minimal()

# Scatter plot for Lower Secondary Education vs Climate Concern in Sweden
sweden_plot2 <- ggplot(education_climate_swedenu, aes(x = US_percentage, y = avg_climate_concern, label = region)) +
  geom_point(color = "blue", size = 3) +
  geom_text(vjust = 1.5, hjust = 1, size = 3) +
  labs(
    title = "Education Level vs Climate Concern in Sweden",
    x = "Percentage of Low Education",
    y = "Average Climate Concern"
  ) +
  theme_minimal()

# Print plots for Sweden
print(sweden_plot)

print(sweden_plot1)

Initial Analysis

Madeline Pirkey

2025-03-10