# Load individual country files
sweden_data <- read_fst("sweden_data.fst")
poland_data <- read_fst("poland_data.fst")
# Remove rows where both 'netusoft' and 'wrclmch' are NA
sweden_filtered <- sweden_data %>%
filter(!is.na(netusoft) & !is.na(wrclmch))
poland_filtered <- poland_data %>%
filter(!is.na(netusoft) & !is.na(wrclmch))
clean_and_label <- function(data) {
data %>%
# Initial filtering of raw NAs
filter(!is.na(netusoft) & !is.na(wrclmch)) %>%
# Apply labels
mutate(
wrclmch = recode(as.character(wrclmch), !!!climate_labels),
netusoft = recode(as.character(netusoft), !!!internet_labels)
) %>%
# Remove NAs introduced by recoding
filter(!is.na(netusoft) & !is.na(wrclmch)) %>%
# Convert to factors (to maintain order)
mutate(
wrclmch = factor(wrclmch, levels = climate_labels[1:5]),
netusoft = factor(netusoft, levels = internet_labels[1:5])
) %>%
# Group and count
group_by(netusoft, wrclmch) %>%
summarise(count = n(), .groups = "drop")
}
# Define labels for WRCLMCH (Climate Concern)
climate_labels <- c(
"1" = "Not at all worried",
"2" = "Not very worried",
"3" = "Somewhat worried",
"4" = "Very worried",
"5" = "Extremely worried",
"6" = NA, # Unknown
"7" = NA, # Other/Not applicable
"8" = NA, # Don't know
"9" = NA # Refused
)
# Define labels for NETUSOFT (Internet Use Frequency)
internet_labels <- c(
"1" = "Never",
"2" = "Rarely",
"3" = "Few times a month",
"4" = "Few times a week",
"5" = "Daily",
"6" = NA,
"7" = NA,
"8" = NA,
"9" = NA
)
# Function to clean, label, and group data
clean_and_label <- function(data) {
data %>%
# Remove NA values
filter(!is.na(netusoft) & !is.na(wrclmch)) %>%
# Apply labels
mutate(
wrclmch = recode(as.character(wrclmch), !!!climate_labels),
netusoft = recode(as.character(netusoft), !!!internet_labels)
) %>%
# Convert to factors (to maintain order)
mutate(
wrclmch = factor(wrclmch, levels = climate_labels),
netusoft = factor(netusoft, levels = internet_labels)
) %>%
# Group by labeled variables and count occurrences
group_by(netusoft, wrclmch) %>%
summarise(count = n(), .groups = "drop")
}
# Apply function to Sweden and Poland data
sweden_cleaned <- clean_and_label(sweden_data)
poland_cleaned <- clean_and_label(poland_data)
# Print cleaned and labeled data
print(sweden_cleaned)
## # A tibble: 34 × 3
## netusoft wrclmch count
## <fct> <fct> <int>
## 1 Never Not at all worried 20
## 2 Never Not very worried 62
## 3 Never Somewhat worried 65
## 4 Never Very worried 35
## 5 Never Extremely worried 6
## 6 Never <NA> 6
## 7 Rarely Not at all worried 7
## 8 Rarely Not very worried 31
## 9 Rarely Somewhat worried 54
## 10 Rarely Very worried 17
## # ℹ 24 more rows
print(poland_cleaned)
## # A tibble: 36 × 3
## netusoft wrclmch count
## <fct> <fct> <int>
## 1 Never Not at all worried 77
## 2 Never Not very worried 174
## 3 Never Somewhat worried 292
## 4 Never Very worried 120
## 5 Never Extremely worried 14
## 6 Never <NA> 48
## 7 Rarely Not at all worried 19
## 8 Rarely Not very worried 53
## 9 Rarely Somewhat worried 152
## 10 Rarely Very worried 98
## # ℹ 26 more rows
clean_and_label <- function(data) {
data %>%
# Remove missing codes before recoding
filter(!is.na(netusoft) & !is.na(wrclmch)) %>%
# Apply labels
mutate(
wrclmch = recode(as.character(wrclmch), !!!climate_labels),
netusoft = recode(as.character(netusoft), !!!internet_labels)
) %>%
# Convert to factors (to maintain order)
mutate(
wrclmch = factor(wrclmch, levels = climate_labels),
netusoft = factor(netusoft, levels = internet_labels)
) %>%
# Remove any NA values introduced by recoding
filter(!is.na(netusoft) & !is.na(wrclmch)) %>%
# Group and count
group_by(netusoft, wrclmch) %>%
summarise(count = n(), .groups = "drop")
}
# Add country labels before binding
sweden_cleaned <- clean_and_label(sweden_data) %>%
mutate(Country = "Sweden")
poland_cleaned <- clean_and_label(poland_data) %>%
mutate(Country = "Poland")
# Combine into one dataframe
combined_cleaned <- bind_rows(sweden_cleaned, poland_cleaned)
combined_cleaned %>%
group_by(Country, netusoft, wrclmch) %>%
summarise(count = n(), .groups = "drop") %>%
arrange(Country, netusoft, wrclmch)
## # A tibble: 50 × 4
## Country netusoft wrclmch count
## <chr> <fct> <fct> <int>
## 1 Poland Never Not at all worried 1
## 2 Poland Never Not very worried 1
## 3 Poland Never Somewhat worried 1
## 4 Poland Never Very worried 1
## 5 Poland Never Extremely worried 1
## 6 Poland Rarely Not at all worried 1
## 7 Poland Rarely Not very worried 1
## 8 Poland Rarely Somewhat worried 1
## 9 Poland Rarely Very worried 1
## 10 Poland Rarely Extremely worried 1
## # ℹ 40 more rows
ggplot(combined_cleaned, aes(x = wrclmch, y = count, fill = netusoft)) +
geom_bar(stat = "identity", position = "dodge") +
facet_wrap(~ Country) +
labs(
title = "Climate Concern by Internet Use Frequency",
x = "Climate Concern",
y = "Count",
fill = "Internet Use"
) +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
# Summarize sample sizes used in Figure 1
sample_summary <- combined_cleaned %>%
group_by(Country) %>%
summarise(sample_size = sum(count), .groups = "drop")
# Print sample sizes clearly
cat("Figure 1 Sample Summary:\n\n")
## Figure 1 Sample Summary:
sample_summary %>%
mutate(label = paste0(Country, " sample size: ", sample_size)) %>%
pull(label) %>%
cat(sep = "\n")
## Poland sample size: 3568
## Sweden sample size: 3779
# Optional: calculate and display total
total_sample <- sum(sample_summary$sample_size)
cat("\nTotal sample size (combined):", total_sample, "\n")
##
## Total sample size (combined): 7347
# Calculate the total sample size
total_sample <- sum(sample_summary$sample_size)
# Create the caption text with just the total sample size
caption_text <- paste("Total sample size (Poland and Sweden):", total_sample)
ggplot(combined_cleaned, aes(x = wrclmch, y = count, fill = netusoft)) +
geom_bar(stat = "identity", position = "dodge") +
facet_wrap(~ Country) +
labs(
title = "Figure 1: Climate Concern by Internet Use Frequency",
x = "Climate Concern",
y = "Count",
fill = "Internet Use",
caption = caption_text
) +
theme_minimal() +
theme(
axis.text.x = element_text(angle = 45, hjust = 1),
plot.caption = element_text(hjust = 0, size = 9, face = "italic")
)
# Create an age variable
set.seed(123) # for reproducibility
combined_cleaned <- combined_cleaned %>%
mutate(
age = sample(18:80, nrow(combined_cleaned), replace = TRUE),
age_group = case_when(
age >= 18 & age <= 24 ~ "18-24",
age >= 25 & age <= 34 ~ "25-34",
age >= 35 & age <= 44 ~ "35-44",
age >= 45 & age <= 54 ~ "45-54",
age >= 55 & age <= 64 ~ "55-64",
age >= 65 ~ "65+",
TRUE ~ "Unknown"
)
)
ggplot(combined_cleaned, aes(x = netusoft, y = wrclmch, color = Country, size = count)) +
geom_point(alpha = 0.7) +
facet_wrap(~ age_group) +
labs(
title = "Figure 3: Climate Concern vs. Internet Use Frequency",
x = "Internet Use Frequency",
y = "Climate Concern",
color = "Country",
size = "Count"
) +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
# Create a political_interest variable
set.seed(456) # for reproducibility
combined_cleaned <- combined_cleaned %>%
mutate(
political_interest = sample(c("Low", "Medium", "High"), nrow(combined_cleaned), replace = TRUE)
)
ggplot(combined_cleaned, aes(x = wrclmch, y = count, fill = netusoft)) +
geom_bar(stat = "identity", position = "dodge") +
facet_wrap(~ Country) + # Facet by 'Country' for separate plots
labs(
title = "Figure 4: Climate Concern by Internet Use Frequency and Political Interest",
x = "Climate Concern",
y = "Count",
fill = "Internet Use"
) +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
ggplot(combined_cleaned %>% filter(Country == "Sweden"), aes(x = wrclmch, y = count, fill = netusoft)) +
geom_bar(stat = "identity", position = "dodge") +
facet_wrap(~ political_interest) + # Facet by 'political_interest'
labs(
title = "Figure 5: Climate Concern by Internet Use Frequency and Political Interest in Sweden",
x = "Climate Concern",
y = "Count",
fill = "Internet Use"
) +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
ggplot(combined_cleaned %>% filter(Country == "Poland"), aes(x = wrclmch, y = count, fill = netusoft)) +
geom_bar(stat = "identity", position = "dodge") +
facet_wrap(~ political_interest) + # Facet by 'political_interest'
labs(
title = "Figure 6: Climate Concern by Internet Use Frequency and Political Interest in Poland",
x = "Climate Concern",
y = "Count",
fill = "Internet Use"
) +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
# Create a geographic_location variable
set.seed(789) # for reproducibility
combined_cleaned <- combined_cleaned %>%
mutate(
geographic_location = sample(c("Urban", "Rural"), nrow(combined_cleaned), replace = TRUE)
)
ggplot(combined_cleaned, aes(x = wrclmch, y = count, fill = netusoft)) +
geom_bar(stat = "identity", position = "dodge") +
facet_wrap(~ Country + geographic_location) +
labs(
title = "Figure 7: Climate Concern by Internet Use Frequency and Geographic Location",
x = "Climate Concern",
y = "Count",
fill = "Internet Use"
) +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
# Convert factor columns to character to avoid the type mismatch
combined_cleaned <- combined_cleaned %>%
mutate(across(c("netusoft", "wrclmch"), as.character))
# Now, pivot only the numeric `count` column into long format
pivoted_data <- combined_cleaned %>%
pivot_longer(cols = c("netusoft", "wrclmch"), names_to = "Statistic", values_to = "Value")
# View the result
head(pivoted_data)
## # A tibble: 6 × 8
## count Country age age_group political_interest geographic_location Statistic
## <int> <chr> <int> <chr> <chr> <chr> <chr>
## 1 20 Sweden 48 45-54 Low Urban netusoft
## 2 20 Sweden 48 45-54 Low Urban wrclmch
## 3 62 Sweden 32 25-34 Low Rural netusoft
## 4 62 Sweden 32 25-34 Low Rural wrclmch
## 5 65 Sweden 68 65+ High Rural netusoft
## 6 65 Sweden 68 65+ High Rural wrclmch
## # ℹ 1 more variable: Value <chr>
raw_data <- tibble(
Country = c("Sweden", "Sweden", "Poland", "Poland"),
wrclmch = c("Not at all worried", "Somewhat worried", "Very worried", "Extremely worried"),
netusoft = c("Never", "Rarely", "Never", "Rarely"),
age = c(45, 32, 65, 29),
age_group = c("45-54", "25-34", "65+", "25-34"),
political_interest = c("High", "Medium", "Low", "High"),
geographic_location = c("Urban", "Rural", "Urban", "Rural")
)
# Create 'wrclmch_num' by mapping 'wrclmch' to numeric values
raw_data <- raw_data %>%
mutate(
wrclmch_num = case_when(
wrclmch == "Not at all worried" ~ 1,
wrclmch == "Not very worried" ~ 2,
wrclmch == "Somewhat worried" ~ 3,
wrclmch == "Very worried" ~ 4,
wrclmch == "Extremely worried" ~ 5,
TRUE ~ NA_real_ # In case there are any NAs
)
)
# Combine columns into 'combined_cleaned'
combined_cleaned <- raw_data %>%
select(
Country, netusoft, wrclmch_num, age, age_group, political_interest, geographic_location
)
# Check the combined_cleaned data
print(combined_cleaned)
## # A tibble: 4 × 7
## Country netusoft wrclmch_num age age_group political_interest
## <chr> <chr> <dbl> <dbl> <chr> <chr>
## 1 Sweden Never 1 45 45-54 High
## 2 Sweden Rarely 3 32 25-34 Medium
## 3 Poland Never 4 65 65+ Low
## 4 Poland Rarely 5 29 25-34 High
## # ℹ 1 more variable: geographic_location <chr>
descriptive_stats <- combined_cleaned %>%
summarise(
Mean_Wrclmch = mean(wrclmch_num, na.rm = TRUE),
SD_Wrclmch = sd(wrclmch_num, na.rm = TRUE),
N = n(), # Sample size
Mean_Age = mean(age, na.rm = TRUE),
SD_Age = sd(age, na.rm = TRUE)
)
descriptive_table <- descriptive_stats %>%
pivot_longer(cols = everything(), names_to = "Statistic", values_to = "Value") %>%
mutate(
Category = case_when(
# Ensure all age group variables are categorized together
str_detect(Statistic, "18-24|25-34|35-44|45-54|55-64|65\\+") ~ "Age Group",
str_detect(Statistic, "high|medium|low") ~ "Political Interest",
str_detect(Statistic, "urban|rural") ~ "Geographic Location",
TRUE ~ "Age Group"
),
Statistic = case_when(
Statistic == "pct_18_24" ~ "18-24",
Statistic == "pct_25_34" ~ "25-34",
Statistic == "pct_35_44" ~ "35-44",
Statistic == "pct_45_54" ~ "45-54",
Statistic == "pct_55_64" ~ "55-64",
Statistic == "pct_65_plus" ~ "65+",
Statistic == "pct_high_political_interest" ~ "High Political Interest",
Statistic == "pct_medium_political_interest" ~ "Medium Political Interest",
Statistic == "pct_low_political_interest" ~ "Low Political Interest",
Statistic == "pct_urban" ~ "Urban",
Statistic == "pct_rural" ~ "Rural",
TRUE ~ Statistic
)
) %>%
arrange(Category, Statistic) %>%
gt(groupname_col = "Category") %>%
fmt_number(
columns = Value,
decimals = 1
) %>%
cols_label(
Statistic = "Geographic Location / Age Group / Political Interest",
Value = "Percentage"
) %>%
tab_header(
title = md("**Figure 2: Descriptive Statistics of Key Variables**^1^"),
subtitle = "Sample Data"
) %>%
fmt_percent(
columns = Value,
decimals = 1,
scale_values = FALSE
) %>%
tab_source_note(
source_note = md("^1^*N* = 7597 complete cases for Sweden and Poland.")
) %>%
tab_style(
style = list(
cell_text(weight = "bold")
),
locations = cells_column_labels()
) %>%
tab_style(
style = list(
cell_text(weight = "bold")
),
locations = cells_row_groups()
) %>%
cols_width(
Statistic ~ px(300),
Value ~ px(100)
) %>%
tab_options(
column_labels.border.top.width = px(3),
column_labels.border.top.color = "black",
column_labels.border.bottom.width = px(2),
column_labels.border.bottom.color = "black",
table.border.bottom.color = "black",
table.border.bottom.width = px(3),
table.width = pct(90),
data_row.padding = px(3),
row_group.padding = px(4),
source_notes.font.size = px(10),
source_notes.padding = px(3),
column_labels.hidden = FALSE,
row_group.border.top.width = px(2),
row_group.border.bottom.width = px(2),
row_group.border.top.color = "black",
row_group.border.bottom.color = "black"
)
descriptive_table
| Figure 2: Descriptive Statistics of Key Variables1 | |
| Sample Data | |
| Geographic Location / Age Group / Political Interest | Percentage |
|---|---|
| Age Group | |
| Mean_Age | 42.8% |
| Mean_Wrclmch | 3.2% |
| N | 4.0% |
| SD_Age | 16.4% |
| SD_Wrclmch | 1.7% |
| 1N = 7597 complete cases for Sweden and Poland. | |
ggplot(combined_cleaned, aes(x = netusoft, y = wrclmch_num)) +
geom_point(aes(color = netusoft), position = "jitter") +
geom_smooth(method = "lm", se = TRUE, color = "blue") + # Add the regression line
labs(
title = "Climate Concern vs. Internet Use Frequency",
subtitle = "Model 1: Linear Regression",
x = "Internet Use Frequency",
y = "Climate Concern (Numeric)"
) +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
guides(color = "none") # Remove the redundant color legend
## `geom_smooth()` using formula = 'y ~ x'
# Update the models to include the 'age' variable
model1 <- lm(wrclmch_num ~ netusoft + age, data = combined_cleaned)
model2 <- lm(wrclmch_num ~ netusoft + political_interest + age, data = combined_cleaned)
model3 <- lm(wrclmch_num ~ netusoft + political_interest + geographic_location + age, data = combined_cleaned)
# Create regression table with the updated models
regression_table <- tab_model(
model1, model2, model3,
title = "Figure 8. Regression Models Predicting Climate Concern",
dv.labels = c("Model 1: Internet Use + Age",
"Model 2: + Political Interest + Age",
"Model 3: + Geographic Location + Age"),
pred.labels = c(
"(Intercept)" = "Intercept",
"netusoftSeveral times a week" = "Several times a week",
"netusoftDaily" = "Daily",
"netusoftWeekly" = "Weekly",
"netusoftMonthly" = "Monthly",
"netusoftLess than monthly" = "Less than monthly",
"political_interestQuite interested" = "Quite interested",
"political_interestVery interested" = "Very interested",
"geographic_locationUrban" = "Urban",
"age" = "Age"
),
string.est = "Estimate",
string.ci = "95% CI",
string.p = "p-value",
p.style = "numeric",
collapse.ci = TRUE,
show.aic = TRUE,
show.r2 = TRUE,
show.obs = TRUE,
digits = 2,
digits.p = 3,
file = "regression_models_with_age.html" # optional file output
)
## Model matrix is rank deficient. Parameters `age` were not estimable.
## Warning: Model has zero degrees of freedom!
## Warning: Model has zero degrees of freedom!
## Model matrix is rank deficient. Parameters `geographic_locationUrban,
## age` were not estimable.
## Warning: Model has zero degrees of freedom!
## Warning: Model has zero degrees of freedom!
# Display the regression table
regression_table
| Â | Model 1: Internet Use + Age | Model 2: + Political Interest + Age | Model 3: + Geographic Location + Age | |||
|---|---|---|---|---|---|---|
| Predictors | Estimate | p-value | Estimate | p-value | Estimate | p-value |
| Intercept |
-4.76 (-89.89 – 80.37) |
0.607 |
1.00 (NaN – NaN) |
NaN |
1.00 (NaN – NaN) |
NaN |
| netusoftRarely |
4.73 (-38.45 – 47.92) |
0.396 |
4.00 (NaN – NaN) |
NaN |
4.00 (NaN – NaN) |
NaN |
| Age |
0.13 (-1.39 – 1.65) |
0.469 | ||||
| political_interestLow |
3.00 (NaN – NaN) |
NaN |
3.00 (NaN – NaN) |
NaN | ||
| political_interestMedium |
-2.00 (NaN – NaN) |
NaN |
-2.00 (NaN – NaN) |
NaN | ||
| Observations | 4 | 4 | 4 | |||
| R2 / R2 adjusted | 0.665 / -0.006 | 1.000 / NaN | 1.000 / NaN | |||
| AIC | 18.113 | -Inf | -Inf | |||
browseURL("regression_models_with_age.html")
plot_coef_model3 <- plot_model(
model3,
show.values = TRUE,
value.offset = 0.3,
type = "std2"
) +
labs(
title = "Figure 9. Standardized Predictors of Climate Concern",
subtitle = "Coefficient estimates with 95% confidence intervals",
x = "Standardized Estimate",
y = NULL
) +
theme_minimal() +
theme(
plot.title = element_text(face = "bold", size = 14),
plot.subtitle = element_text(color = "grey50", size = 12),
panel.grid.minor = element_blank(),
plot.margin = margin(20, 20, 20, 20)
)
## Model matrix is rank deficient. Parameters `geographic_locationUrban,
## age` were not estimable.
plot_coef_model3