NR_overview

Author

VG Team

Code

source("scripts/nr_regressions.R")

Easy boxplots

Code

ggplot(data=complete_data) +
  geom_boxplot(aes(x=as.factor(gender_male), y=NR_score, group=as.factor(gender_male), fill=as.factor(gender_male))) +
  labs(fill="Gender",
       x="")

Code

ggplot(data=complete_data) +
  geom_boxplot(aes(x=as.factor(urban_rural), y=NR_score, group=as.factor(urban_rural), fill=as.factor(urban_rural))) +
  labs(fill="Urban/Rural",
       x="")

NR Distribution

Code

nr_levels <- sort(unique(complete_data$NR_score))
colors <- colorRampPalette(c("red", "yellow", "darkgreen"))(length(nr_levels))

mean_score <- mean(complete_data$NR_score, na.rm = TRUE)
median_score <- median(complete_data$NR_score, na.rm = TRUE)

ggplot(data = complete_data) +
  geom_bar(aes(x = NR_score, fill = as.factor(NR_score))) +
  geom_vline(xintercept = median_score, linetype = "dashed", color = "black", linewidth = 1) +
  scale_fill_manual(
    values = setNames(colors, nr_levels),
    name = "NR Score"
  ) +
  guides(fill = "none") +
  theme_minimal() +
  theme(
    legend.title = element_text(size = 12),
    legend.text = element_text(size = 10)
  ) +
  labs(x = "NR Score", y = "Count")

NR Distribution by Voting

Code

rel_data <- complete_data %>%
  filter(!is.na(NR_score), !is.na(voting)) %>%
  count(voting, NR_score) %>%
  group_by(voting) %>%
  mutate(share = n / sum(n)) %>%
  ungroup()

medians <- complete_data %>%
  filter(!is.na(NR_score), !is.na(voting)) %>%
  group_by(voting) %>%
  summarize(median_score = median(NR_score))

# Step 2: Define color palette
nr_levels <- sort(unique(complete_data$NR_score))
colors <- colorRampPalette(c("red", "yellow", "darkgreen"))(length(nr_levels))

# Step 3: Plot
ggplot(rel_data) +
  geom_bar(aes(x = factor(NR_score), y = share, fill = factor(NR_score)), stat = "identity") +
  scale_fill_manual(values = setNames(colors, nr_levels)) +
  facet_wrap(~ voting) +
  labs(x = "NR Score", y = "Relative Share") +
  guides(fill = "none", x="none") +
  theme_minimal() +
  theme(
    strip.text = element_text(size = 12, face = "bold"),
    axis.text.x = element_text(angle = 45, hjust = 1)
  )

Correlation Plots

Code

complete_data <- complete_data %>% filter(birthyear <= 2010,
                                          cv <= 20000) %>% 
  mutate(birthyear_mean = birthyear - mean(birthyear, na.rm=TRUE))


# Plot correlation between age and NR-score
ggplot(data=complete_data) +
  geom_jitter(aes(x=lifesat_recode, y=NR_score), width = 0.4, height = 0.1, alpha = 0.4, size = 2) +
  geom_smooth(aes(x=lifesat_recode, y=NR_score), color="red", method="lm", se=TRUE) +
  labs(x="Life Satisfaction", y="NR Score") +
  theme_minimal()

Code

# Plot correlation between health phys and NR-score
ggplot(data=complete_data) +
  geom_jitter(aes(x=healthphys_recode, y=NR_score), width = 0.4, height = 0.1, alpha = 0.4, size = 2) +
  geom_smooth(aes(x=healthphys_recode, y=NR_score), color="red", method="lm", se=TRUE) +
  labs(x="Health Physical", y="NR Score") +
  theme_minimal()

Code

# Plot correlation between health psych and NR-score
ggplot(data=complete_data) +
  geom_jitter(aes(x=healthpsych_recode, y=NR_score), width = 0.4, height = 0.1, alpha = 0.4, size = 2) +
  geom_smooth(aes(x=healthpsych_recode, y=NR_score), color="red", method="lm", se=TRUE) +
  labs(x="Health Psychological", y="NR Score") +
  theme_minimal()

Code

ggplot(data=complete_data) +
  geom_jitter(aes(x=log(cv+1), y=NR_score), width = 0.4, height = 0.1, alpha = 0.4, size = 2) +
  geom_smooth(aes(x=log(cv+1), y=NR_score), color="red", method="lm", se=TRUE) +
  labs(x="CV (log +1)", y="NR Score") +
  theme_minimal()

Code

ggplot(data=complete_data) +
  geom_jitter(aes(x=healthpsych_recode, y=lifesat_recode), width = 0.4, height = 0.1, alpha = 0.4, size = 2) +
  geom_smooth(aes(x=healthpsych_recode, y=lifesat_recode), color="red", method="lm", se=TRUE) +
  labs(x="Health Psychological", y="Lifesat") +
  theme_minimal()

Code

ggplot(data=complete_data) +
  geom_jitter(aes(x=healthphys_recode, y=lifesat_recode), width = 0.4, height = 0.1, alpha = 0.4, size = 2) +
  geom_smooth(aes(x=healthphys_recode, y=lifesat_recode), color="red", method="lm", se=TRUE) +
  labs(x="Health Physical", y="Lifesat") +
  theme_minimal()

Code

ggplot(data=complete_data) +
  geom_jitter(aes(x=healthpsych_recode, y=healthphys_recode), width = 0.4, height = 0.1, alpha = 0.4, size = 2) +
  geom_smooth(aes(x=healthpsych_recode, y=healthphys_recode), color="red", method="lm", se=TRUE) +
  labs(x="Health Psychological", y="Health Physical") +
  theme_minimal()

Code

ggplot(data=complete_data) +
  geom_jitter(aes(x=healthpsych_recode, y=log(cv+1)), width = 0.4, height = 0.1, alpha = 0.4, size = 2) +
  geom_smooth(aes(x=healthpsych_recode, y=log(cv+1)), color="red", method="lm", se=TRUE) +
  labs(x="Health Psychological", y="CV (log+1)") +
  theme_minimal()

Code

ggplot(data=complete_data) +
  geom_jitter(aes(x=healthphys_recode, y=log(cv+1)), width = 0.4, height = 0.1, alpha = 0.4, size = 2) +
  geom_smooth(aes(x=healthphys_recode, y=log(cv+1)), color="red", method="lm", se=TRUE) +
  labs(x="Health Physical", y="CV (log+1)") +
  theme_minimal()

Simple Regression

Code

texreg::htmlreg(lm(NR_score ~ gender_male + birthyear_mean + healthphys_recode + healthpsych_recode + lifesat_recode + log(cv+1), data=complete_data), digits=3)

Statistical models
	Model 1
(Intercept)	3.124^***
	(0.031)
gender_male	-0.128^***
	(0.016)
birthyear_mean	-0.007^***
	(0.001)
healthphys_recode	0.018^***
	(0.005)
healthpsych_recode	-0.008
	(0.005)
lifesat_recode	0.014^**
	(0.005)
log(cv + 1)	0.146^***
	(0.004)
R²	0.120
Adj. R²	0.120
Num. obs.	10262
^*p < 0.001; ^p < 0.01; ^*p < 0.05

Code

sd(complete_data$NR_score, na.rm = TRUE)

[1] 0.8456547

Code

texreg::htmlreg(lm(NR_score ~ gender_male + birthyear_mean + healthphys_recode + log(cv+1), data=complete_data), digits=3)

Statistical models
	Model 1
(Intercept)	3.134^***
	(0.028)
gender_male	-0.127^***
	(0.016)
birthyear_mean	-0.007^***
	(0.001)
healthphys_recode	0.022^***
	(0.004)
log(cv + 1)	0.147^***
	(0.004)
R²	0.120
Adj. R²	0.120
Num. obs.	10298
^*p < 0.001; ^p < 0.01; ^*p < 0.05

Code

texreg::htmlreg(lm(NR_score ~ gender_male + birthyear_mean + healthpsych_recode + log(cv+1), data=complete_data), digits=3)

Statistical models
	Model 1
(Intercept)	3.197^***
	(0.028)
gender_male	-0.130^***
	(0.016)
birthyear_mean	-0.006^***
	(0.001)
healthpsych_recode	0.010^**
	(0.003)
log(cv + 1)	0.149^***
	(0.004)
R²	0.117
Adj. R²	0.117
Num. obs.	10317
^*p < 0.001; ^p < 0.01; ^*p < 0.05

Code

texreg::htmlreg(lm(NR_score ~ gender_male + birthyear_mean + lifesat_recode + log(cv+1), data=complete_data), digits=3)

Statistical models
	Model 1
(Intercept)	3.146^***
	(0.028)
gender_male	-0.131^***
	(0.016)
birthyear_mean	-0.006^***
	(0.000)
lifesat_recode	0.020^***
	(0.004)
log(cv + 1)	0.146^***
	(0.004)
R²	0.119
Adj. R²	0.119
Num. obs.	10327
^*p < 0.001; ^p < 0.01; ^*p < 0.05

Check Spatial Clustering

Code

data_sf <- st_as_sf(data_sf, coords = c("lon", "lat"), crs = 4326)

## Nearest Neighbors
coords <- st_coordinates(data_sf)  # Extract coordinates
neighbors <- knearneigh(coords, k = 10)  # Create k-nearest neighbors (k=6)
nb_list <- knn2nb(neighbors)  # Convert to neighbor list

weights <- nb2listw(nb_list, style = "W", zero.policy = TRUE)


## Distance based
data_sf <- st_transform(data_sf, crs = 25832)
distance_threshold <- 20000  # in meters (50km)

coords <- st_coordinates(data_sf)
dist_matrix <- as.matrix(dist(coords))  # Compute all distances
nb_dist <- dnearneigh(coords, d1 = 0, d2 = distance_threshold)

weights_dist <- nb2listw(nb_dist, style = "W", zero.policy = TRUE)


print("Test for spatial autocorrelation of NR Distance")

[1] "Test for spatial autocorrelation of NR Distance"

Code

morans_test <- moran.test(data_sf$NR_score, listw = weights_dist, zero.policy = TRUE)
print(morans_test)


    Moran I test under randomisation

data:  data_sf$NR_score  
weights: weights_dist  
n reduced by no-neighbour observations  

Moran I statistic standard deviate = 0.878, p-value = 0.19
alternative hypothesis: greater
sample estimates:
Moran I statistic       Expectation          Variance 
     1.909860e-03     -9.433072e-05      5.210661e-06

Code

print("Test for spatial autocorrelation of NR K Nearest Neighbors")

[1] "Test for spatial autocorrelation of NR K Nearest Neighbors"

Code

morans_test <- moran.test(data_sf$NR_score, listw = weights, zero.policy = TRUE)
print(morans_test)


    Moran I test under randomisation

data:  data_sf$NR_score  
weights: weights    

Moran I statistic standard deviate = 4.6401, p-value = 1.741e-06
alternative hypothesis: greater
sample estimates:
Moran I statistic       Expectation          Variance 
     1.866652e-02     -9.429514e-05      1.634746e-05

Code

# Ensure same CRS for germany_sf and data_sf
data_sf <- st_transform(data_sf, crs = 32633)  # Example UTM projection (adjust as needed)
germany_sf <- st_transform(germany_sf, crs = 32633)
federal_sf <- st_transform(federal_sf, crs = 32633)

# Extract coordinates from data
coords <- st_coordinates(data_sf)

# Convert germany_sf to a spatstat window
germany_owin <- as.owin(germany_sf)  # Convert sf to Spatial first

# Create ppp object with Germany as window
points_ppp <- ppp(coords[,1], coords[,2], window = germany_owin, marks = data_sf$NR_score)

# Perform Gaussian kernel density estimation
density_map <- Smooth.ppp(points_ppp, sigma = 35000, W = germany_owin)  # Adjust sigma for smoothness

# Convert density raster to dataframe for ggplot
density_df <- as.data.frame(as.im(density_map))
colnames(density_df) <- c("x", "y", "density")

# Plot KDE map
ggplot() +
  geom_raster(data = density_df, aes(x = x, y = y, fill = density)) +
  scale_fill_gradientn(
    colours = c("red", "yellow", "darkgreen"),
    name = "NR Score"
  ) +
  geom_sf(data = federal_sf, fill = NA, color = "black", size = 0.8) +
  theme_minimal() +
  labs(title = "Heatmap NR") +
  coord_sf()

Code

### Life sat
data_sf <- data_sf %>% filter(!is.na(lifesat_recode))
coords <- st_coordinates(data_sf)
points_ppp <- ppp(coords[,1], coords[,2], window = germany_owin, marks = data_sf$lifesat_recode)

# Perform Gaussian kernel density estimation
density_map <- Smooth.ppp(points_ppp, sigma = 30000, W = germany_owin)  # Adjust sigma for smoothness

# Convert density raster to dataframe for ggplot
density_df <- as.data.frame(as.im(density_map))
colnames(density_df) <- c("x", "y", "density")

# Plot KDE map
ggplot() +
  geom_raster(data = density_df, aes(x = x, y = y, fill = density)) +
  scale_fill_gradientn(
    colours = c("red", "yellow", "darkgreen"),
    name = "Lifesat Index"
  ) +
  geom_sf(data = federal_sf, fill = NA, color = "black", size = 0.8) +
  theme_minimal() +
  labs(title = "Heatmap Lifesat") +
  coord_sf()

Code

# Health Phys
data_sf <- data_sf %>% filter(!is.na(healthphys_recode))
coords <- st_coordinates(data_sf)
points_ppp <- ppp(coords[,1], coords[,2], window = germany_owin, marks = data_sf$healthphys_recode)

# Perform Gaussian kernel density estimation
density_map <- Smooth.ppp(points_ppp, sigma = 30000, W = germany_owin)  # Adjust sigma for smoothness
# Convert density raster to dataframe for ggplot
density_df <- as.data.frame(as.im(density_map))
colnames(density_df) <- c("x", "y", "density")

# Plot KDE map
ggplot() +
  geom_raster(data = density_df, aes(x = x, y = y, fill = density)) +
  scale_fill_gradientn(
    colours = c("red", "yellow", "darkgreen"),
    name = "Health Phys Index"
  ) +
  geom_sf(data = federal_sf, fill = NA, color = "black", size = 0.8) +
  theme_minimal() +
  labs(title = "Heatmap Health Phys") +
  coord_sf()

Code

# Health Psych
data_sf <- data_sf %>% filter(!is.na(healthpsych_recode))
coords <- st_coordinates(data_sf)
points_ppp <- ppp(coords[,1], coords[,2], window = germany_owin, marks = data_sf$healthpsych_recode)

# Perform Gaussian kernel density estimation
density_map <- Smooth.ppp(points_ppp, sigma = 30000, W = germany_owin)  # Adjust sigma for smoothness
# Convert density raster to dataframe for ggplot
density_df <- as.data.frame(as.im(density_map))
colnames(density_df) <- c("x", "y", "density")
# Plot KDE map
ggplot() +
  geom_raster(data = density_df, aes(x = x, y = y, fill = density)) +
  scale_fill_gradientn(
    colours = c("red", "yellow", "darkgreen"),
    name = "Health Psych Index"
  ) +
  geom_sf(data = federal_sf, fill = NA, color = "black", size = 0.8) +
  theme_minimal() +
  labs(title = "Heatmap Health Psych") +
  coord_sf()

Code

density_map <- Smooth.ppp(points_ppp, sigma = 18000, W = germany_owin)  # Adjust sigma for smoothness
# Convert density raster to dataframe for ggplot
density_df <- as.data.frame(as.im(density_map))
colnames(density_df) <- c("x", "y", "density")
# Plot KDE map
ggplot() +
  geom_raster(data = density_df, aes(x = x, y = y, fill = density)) +
  scale_fill_gradientn(
    colours = c("red", "yellow", "darkgreen"),
    name = "Health Psych Index"
  ) +
  geom_sf(data = federal_sf, fill = NA, color = "black", size = 0.8) +
  theme_minimal() +
  labs(title = "Heatmap Health Psych (less smoothing)") +
  coord_sf()

Code

# Birthyear
data_sf <- data_sf %>% filter(!is.na(birthyear))
coords <- st_coordinates(data_sf)
points_ppp <- ppp(coords[,1], coords[,2], window = germany_owin, marks = data_sf$birthyear)
# Perform Gaussian kernel density estimation
density_map <- Smooth.ppp(points_ppp, sigma = 25000, W = germany_owin)  # Adjust sigma for smoothness
# Convert density raster to dataframe for ggplot
density_df <- as.data.frame(as.im(density_map))
colnames(density_df) <- c("x", "y", "density")

# Plot KDE map
ggplot() +
  geom_raster(data = density_df, aes(x = x, y = y, fill = density)) +
  scale_fill_gradientn(
    colours = c("red", "yellow", "darkgreen"),
    name = "Average Birthyear"
  ) +
  geom_sf(data = federal_sf, fill = NA, color = "black", size = 0.8) +
  theme_minimal() +
  labs(title = "Heatmap Birthyear") +
  coord_sf()