NR_overview

Author

VG Team

Code
source("scripts/nr_regressions.R")

Easy boxplots

Code
ggplot(data=complete_data) +
  geom_boxplot(aes(x=as.factor(gender_male), y=NR_score, group=as.factor(gender_male), fill=as.factor(gender_male))) +
  labs(fill="Gender",
       x="") 

Code
ggplot(data=complete_data) +
  geom_boxplot(aes(x=as.factor(urban_rural), y=NR_score, group=as.factor(urban_rural), fill=as.factor(urban_rural))) +
  labs(fill="Urban/Rural",
       x="") 

NR Distribution

Code
nr_levels <- sort(unique(complete_data$NR_score))
colors <- colorRampPalette(c("red", "yellow", "darkgreen"))(length(nr_levels))

mean_score <- mean(complete_data$NR_score, na.rm = TRUE)
median_score <- median(complete_data$NR_score, na.rm = TRUE)

ggplot(data = complete_data) +
  geom_bar(aes(x = NR_score, fill = as.factor(NR_score))) +
  geom_vline(xintercept = median_score, linetype = "dashed", color = "black", linewidth = 1) +
  scale_fill_manual(
    values = setNames(colors, nr_levels),
    name = "NR Score"
  ) +
  guides(fill = "none") +
  theme_minimal() +
  theme(
    legend.title = element_text(size = 12),
    legend.text = element_text(size = 10)
  ) +
  labs(x = "NR Score", y = "Count")

NR Distribution by Voting

Code
rel_data <- complete_data %>%
  filter(!is.na(NR_score), !is.na(voting)) %>%
  count(voting, NR_score) %>%
  group_by(voting) %>%
  mutate(share = n / sum(n)) %>%
  ungroup()

medians <- complete_data %>%
  filter(!is.na(NR_score), !is.na(voting)) %>%
  group_by(voting) %>%
  summarize(median_score = median(NR_score))

# Step 2: Define color palette
nr_levels <- sort(unique(complete_data$NR_score))
colors <- colorRampPalette(c("red", "yellow", "darkgreen"))(length(nr_levels))

# Step 3: Plot
ggplot(rel_data) +
  geom_bar(aes(x = factor(NR_score), y = share, fill = factor(NR_score)), stat = "identity") +
  scale_fill_manual(values = setNames(colors, nr_levels)) +
  facet_wrap(~ voting) +
  labs(x = "NR Score", y = "Relative Share") +
  guides(fill = "none", x="none") +
  theme_minimal() +
  theme(
    strip.text = element_text(size = 12, face = "bold"),
    axis.text.x = element_text(angle = 45, hjust = 1)
  )

Correlation Plots

Code
complete_data <- complete_data %>% filter(birthyear <= 2010,
                                          cv <= 20000) %>% 
  mutate(birthyear_mean = birthyear - mean(birthyear, na.rm=TRUE))


# Plot correlation between age and NR-score
ggplot(data=complete_data) +
  geom_jitter(aes(x=lifesat_recode, y=NR_score), width = 0.4, height = 0.1, alpha = 0.4, size = 2) +
  geom_smooth(aes(x=lifesat_recode, y=NR_score), color="red", method="lm", se=TRUE) +
  labs(x="Life Satisfaction", y="NR Score") +
  theme_minimal()

Code
# Plot correlation between health phys and NR-score
ggplot(data=complete_data) +
  geom_jitter(aes(x=healthphys_recode, y=NR_score), width = 0.4, height = 0.1, alpha = 0.4, size = 2) +
  geom_smooth(aes(x=healthphys_recode, y=NR_score), color="red", method="lm", se=TRUE) +
  labs(x="Health Physical", y="NR Score") +
  theme_minimal()

Code
# Plot correlation between health psych and NR-score
ggplot(data=complete_data) +
  geom_jitter(aes(x=healthpsych_recode, y=NR_score), width = 0.4, height = 0.1, alpha = 0.4, size = 2) +
  geom_smooth(aes(x=healthpsych_recode, y=NR_score), color="red", method="lm", se=TRUE) +
  labs(x="Health Psychological", y="NR Score") +
  theme_minimal()

Code
ggplot(data=complete_data) +
  geom_jitter(aes(x=log(cv+1), y=NR_score), width = 0.4, height = 0.1, alpha = 0.4, size = 2) +
  geom_smooth(aes(x=log(cv+1), y=NR_score), color="red", method="lm", se=TRUE) +
  labs(x="CV (log +1)", y="NR Score") +
  theme_minimal()

Code
ggplot(data=complete_data) +
  geom_jitter(aes(x=healthpsych_recode, y=lifesat_recode), width = 0.4, height = 0.1, alpha = 0.4, size = 2) +
  geom_smooth(aes(x=healthpsych_recode, y=lifesat_recode), color="red", method="lm", se=TRUE) +
  labs(x="Health Psychological", y="Lifesat") +
  theme_minimal()

Code
ggplot(data=complete_data) +
  geom_jitter(aes(x=healthphys_recode, y=lifesat_recode), width = 0.4, height = 0.1, alpha = 0.4, size = 2) +
  geom_smooth(aes(x=healthphys_recode, y=lifesat_recode), color="red", method="lm", se=TRUE) +
  labs(x="Health Physical", y="Lifesat") +
  theme_minimal()

Code
ggplot(data=complete_data) +
  geom_jitter(aes(x=healthpsych_recode, y=healthphys_recode), width = 0.4, height = 0.1, alpha = 0.4, size = 2) +
  geom_smooth(aes(x=healthpsych_recode, y=healthphys_recode), color="red", method="lm", se=TRUE) +
  labs(x="Health Psychological", y="Health Physical") +
  theme_minimal()

Code
ggplot(data=complete_data) +
  geom_jitter(aes(x=healthpsych_recode, y=log(cv+1)), width = 0.4, height = 0.1, alpha = 0.4, size = 2) +
  geom_smooth(aes(x=healthpsych_recode, y=log(cv+1)), color="red", method="lm", se=TRUE) +
  labs(x="Health Psychological", y="CV (log+1)") +
  theme_minimal()

Code
ggplot(data=complete_data) +
  geom_jitter(aes(x=healthphys_recode, y=log(cv+1)), width = 0.4, height = 0.1, alpha = 0.4, size = 2) +
  geom_smooth(aes(x=healthphys_recode, y=log(cv+1)), color="red", method="lm", se=TRUE) +
  labs(x="Health Physical", y="CV (log+1)") +
  theme_minimal()

Simple Regression

Code
texreg::htmlreg(lm(NR_score ~ gender_male + birthyear_mean + healthphys_recode + healthpsych_recode + lifesat_recode + log(cv+1), data=complete_data), digits=3)
Statistical models
  Model 1
(Intercept) 3.124***
  (0.031)
gender_male -0.128***
  (0.016)
birthyear_mean -0.007***
  (0.001)
healthphys_recode 0.018***
  (0.005)
healthpsych_recode -0.008
  (0.005)
lifesat_recode 0.014**
  (0.005)
log(cv + 1) 0.146***
  (0.004)
R2 0.120
Adj. R2 0.120
Num. obs. 10262
***p < 0.001; **p < 0.01; *p < 0.05
Code
sd(complete_data$NR_score, na.rm = TRUE)
[1] 0.8456547
Code
texreg::htmlreg(lm(NR_score ~ gender_male + birthyear_mean + healthphys_recode + log(cv+1), data=complete_data), digits=3)
Statistical models
  Model 1
(Intercept) 3.134***
  (0.028)
gender_male -0.127***
  (0.016)
birthyear_mean -0.007***
  (0.001)
healthphys_recode 0.022***
  (0.004)
log(cv + 1) 0.147***
  (0.004)
R2 0.120
Adj. R2 0.120
Num. obs. 10298
***p < 0.001; **p < 0.01; *p < 0.05
Code
texreg::htmlreg(lm(NR_score ~ gender_male + birthyear_mean + healthpsych_recode + log(cv+1), data=complete_data), digits=3)
Statistical models
  Model 1
(Intercept) 3.197***
  (0.028)
gender_male -0.130***
  (0.016)
birthyear_mean -0.006***
  (0.001)
healthpsych_recode 0.010**
  (0.003)
log(cv + 1) 0.149***
  (0.004)
R2 0.117
Adj. R2 0.117
Num. obs. 10317
***p < 0.001; **p < 0.01; *p < 0.05
Code
texreg::htmlreg(lm(NR_score ~ gender_male + birthyear_mean + lifesat_recode + log(cv+1), data=complete_data), digits=3)
Statistical models
  Model 1
(Intercept) 3.146***
  (0.028)
gender_male -0.131***
  (0.016)
birthyear_mean -0.006***
  (0.000)
lifesat_recode 0.020***
  (0.004)
log(cv + 1) 0.146***
  (0.004)
R2 0.119
Adj. R2 0.119
Num. obs. 10327
***p < 0.001; **p < 0.01; *p < 0.05

Check Spatial Clustering

Code
data_sf <- st_as_sf(data_sf, coords = c("lon", "lat"), crs = 4326)

## Nearest Neighbors
coords <- st_coordinates(data_sf)  # Extract coordinates
neighbors <- knearneigh(coords, k = 10)  # Create k-nearest neighbors (k=6)
nb_list <- knn2nb(neighbors)  # Convert to neighbor list

weights <- nb2listw(nb_list, style = "W", zero.policy = TRUE)


## Distance based
data_sf <- st_transform(data_sf, crs = 25832)
distance_threshold <- 20000  # in meters (50km)

coords <- st_coordinates(data_sf)
dist_matrix <- as.matrix(dist(coords))  # Compute all distances
nb_dist <- dnearneigh(coords, d1 = 0, d2 = distance_threshold)

weights_dist <- nb2listw(nb_dist, style = "W", zero.policy = TRUE)


print("Test for spatial autocorrelation of NR Distance")
[1] "Test for spatial autocorrelation of NR Distance"
Code
morans_test <- moran.test(data_sf$NR_score, listw = weights_dist, zero.policy = TRUE)
print(morans_test)

    Moran I test under randomisation

data:  data_sf$NR_score  
weights: weights_dist  
n reduced by no-neighbour observations  

Moran I statistic standard deviate = 0.878, p-value = 0.19
alternative hypothesis: greater
sample estimates:
Moran I statistic       Expectation          Variance 
     1.909860e-03     -9.433072e-05      5.210661e-06 
Code
print("Test for spatial autocorrelation of NR K Nearest Neighbors")
[1] "Test for spatial autocorrelation of NR K Nearest Neighbors"
Code
morans_test <- moran.test(data_sf$NR_score, listw = weights, zero.policy = TRUE)
print(morans_test)

    Moran I test under randomisation

data:  data_sf$NR_score  
weights: weights    

Moran I statistic standard deviate = 4.6401, p-value = 1.741e-06
alternative hypothesis: greater
sample estimates:
Moran I statistic       Expectation          Variance 
     1.866652e-02     -9.429514e-05      1.634746e-05 
Code
# Ensure same CRS for germany_sf and data_sf
data_sf <- st_transform(data_sf, crs = 32633)  # Example UTM projection (adjust as needed)
germany_sf <- st_transform(germany_sf, crs = 32633)
federal_sf <- st_transform(federal_sf, crs = 32633)

# Extract coordinates from data
coords <- st_coordinates(data_sf)

# Convert germany_sf to a spatstat window
germany_owin <- as.owin(germany_sf)  # Convert sf to Spatial first

# Create ppp object with Germany as window
points_ppp <- ppp(coords[,1], coords[,2], window = germany_owin, marks = data_sf$NR_score)

# Perform Gaussian kernel density estimation
density_map <- Smooth.ppp(points_ppp, sigma = 35000, W = germany_owin)  # Adjust sigma for smoothness

# Convert density raster to dataframe for ggplot
density_df <- as.data.frame(as.im(density_map))
colnames(density_df) <- c("x", "y", "density")

# Plot KDE map
ggplot() +
  geom_raster(data = density_df, aes(x = x, y = y, fill = density)) +
  scale_fill_gradientn(
    colours = c("red", "yellow", "darkgreen"),
    name = "NR Score"
  ) +
  geom_sf(data = federal_sf, fill = NA, color = "black", size = 0.8) +
  theme_minimal() +
  labs(title = "Heatmap NR") +
  coord_sf()

Code
### Life sat
data_sf <- data_sf %>% filter(!is.na(lifesat_recode))
coords <- st_coordinates(data_sf)
points_ppp <- ppp(coords[,1], coords[,2], window = germany_owin, marks = data_sf$lifesat_recode)

# Perform Gaussian kernel density estimation
density_map <- Smooth.ppp(points_ppp, sigma = 30000, W = germany_owin)  # Adjust sigma for smoothness

# Convert density raster to dataframe for ggplot
density_df <- as.data.frame(as.im(density_map))
colnames(density_df) <- c("x", "y", "density")

# Plot KDE map
ggplot() +
  geom_raster(data = density_df, aes(x = x, y = y, fill = density)) +
  scale_fill_gradientn(
    colours = c("red", "yellow", "darkgreen"),
    name = "Lifesat Index"
  ) +
  geom_sf(data = federal_sf, fill = NA, color = "black", size = 0.8) +
  theme_minimal() +
  labs(title = "Heatmap Lifesat") +
  coord_sf()

Code
# Health Phys
data_sf <- data_sf %>% filter(!is.na(healthphys_recode))
coords <- st_coordinates(data_sf)
points_ppp <- ppp(coords[,1], coords[,2], window = germany_owin, marks = data_sf$healthphys_recode)

# Perform Gaussian kernel density estimation
density_map <- Smooth.ppp(points_ppp, sigma = 30000, W = germany_owin)  # Adjust sigma for smoothness
# Convert density raster to dataframe for ggplot
density_df <- as.data.frame(as.im(density_map))
colnames(density_df) <- c("x", "y", "density")

# Plot KDE map
ggplot() +
  geom_raster(data = density_df, aes(x = x, y = y, fill = density)) +
  scale_fill_gradientn(
    colours = c("red", "yellow", "darkgreen"),
    name = "Health Phys Index"
  ) +
  geom_sf(data = federal_sf, fill = NA, color = "black", size = 0.8) +
  theme_minimal() +
  labs(title = "Heatmap Health Phys") +
  coord_sf()

Code
# Health Psych
data_sf <- data_sf %>% filter(!is.na(healthpsych_recode))
coords <- st_coordinates(data_sf)
points_ppp <- ppp(coords[,1], coords[,2], window = germany_owin, marks = data_sf$healthpsych_recode)

# Perform Gaussian kernel density estimation
density_map <- Smooth.ppp(points_ppp, sigma = 30000, W = germany_owin)  # Adjust sigma for smoothness
# Convert density raster to dataframe for ggplot
density_df <- as.data.frame(as.im(density_map))
colnames(density_df) <- c("x", "y", "density")
# Plot KDE map
ggplot() +
  geom_raster(data = density_df, aes(x = x, y = y, fill = density)) +
  scale_fill_gradientn(
    colours = c("red", "yellow", "darkgreen"),
    name = "Health Psych Index"
  ) +
  geom_sf(data = federal_sf, fill = NA, color = "black", size = 0.8) +
  theme_minimal() +
  labs(title = "Heatmap Health Psych") +
  coord_sf()

Code
density_map <- Smooth.ppp(points_ppp, sigma = 18000, W = germany_owin)  # Adjust sigma for smoothness
# Convert density raster to dataframe for ggplot
density_df <- as.data.frame(as.im(density_map))
colnames(density_df) <- c("x", "y", "density")
# Plot KDE map
ggplot() +
  geom_raster(data = density_df, aes(x = x, y = y, fill = density)) +
  scale_fill_gradientn(
    colours = c("red", "yellow", "darkgreen"),
    name = "Health Psych Index"
  ) +
  geom_sf(data = federal_sf, fill = NA, color = "black", size = 0.8) +
  theme_minimal() +
  labs(title = "Heatmap Health Psych (less smoothing)") +
  coord_sf()

Code
# Birthyear
data_sf <- data_sf %>% filter(!is.na(birthyear))
coords <- st_coordinates(data_sf)
points_ppp <- ppp(coords[,1], coords[,2], window = germany_owin, marks = data_sf$birthyear)
# Perform Gaussian kernel density estimation
density_map <- Smooth.ppp(points_ppp, sigma = 25000, W = germany_owin)  # Adjust sigma for smoothness
# Convert density raster to dataframe for ggplot
density_df <- as.data.frame(as.im(density_map))
colnames(density_df) <- c("x", "y", "density")

# Plot KDE map
ggplot() +
  geom_raster(data = density_df, aes(x = x, y = y, fill = density)) +
  scale_fill_gradientn(
    colours = c("red", "yellow", "darkgreen"),
    name = "Average Birthyear"
  ) +
  geom_sf(data = federal_sf, fill = NA, color = "black", size = 0.8) +
  theme_minimal() +
  labs(title = "Heatmap Birthyear") +
  coord_sf()