Code
source("scripts/nr_regressions.R")
source("scripts/nr_regressions.R")
ggplot(data=complete_data) +
geom_boxplot(aes(x=as.factor(gender_male), y=NR_score, group=as.factor(gender_male), fill=as.factor(gender_male))) +
labs(fill="Gender",
x="")
ggplot(data=complete_data) +
geom_boxplot(aes(x=as.factor(urban_rural), y=NR_score, group=as.factor(urban_rural), fill=as.factor(urban_rural))) +
labs(fill="Urban/Rural",
x="")
<- sort(unique(complete_data$NR_score))
nr_levels <- colorRampPalette(c("red", "yellow", "darkgreen"))(length(nr_levels))
colors
<- mean(complete_data$NR_score, na.rm = TRUE)
mean_score <- median(complete_data$NR_score, na.rm = TRUE)
median_score
ggplot(data = complete_data) +
geom_bar(aes(x = NR_score, fill = as.factor(NR_score))) +
geom_vline(xintercept = median_score, linetype = "dashed", color = "black", linewidth = 1) +
scale_fill_manual(
values = setNames(colors, nr_levels),
name = "NR Score"
+
) guides(fill = "none") +
theme_minimal() +
theme(
legend.title = element_text(size = 12),
legend.text = element_text(size = 10)
+
) labs(x = "NR Score", y = "Count")
<- complete_data %>%
rel_data filter(!is.na(NR_score), !is.na(voting)) %>%
count(voting, NR_score) %>%
group_by(voting) %>%
mutate(share = n / sum(n)) %>%
ungroup()
<- complete_data %>%
medians filter(!is.na(NR_score), !is.na(voting)) %>%
group_by(voting) %>%
summarize(median_score = median(NR_score))
# Step 2: Define color palette
<- sort(unique(complete_data$NR_score))
nr_levels <- colorRampPalette(c("red", "yellow", "darkgreen"))(length(nr_levels))
colors
# Step 3: Plot
ggplot(rel_data) +
geom_bar(aes(x = factor(NR_score), y = share, fill = factor(NR_score)), stat = "identity") +
scale_fill_manual(values = setNames(colors, nr_levels)) +
facet_wrap(~ voting) +
labs(x = "NR Score", y = "Relative Share") +
guides(fill = "none", x="none") +
theme_minimal() +
theme(
strip.text = element_text(size = 12, face = "bold"),
axis.text.x = element_text(angle = 45, hjust = 1)
)
<- complete_data %>% filter(birthyear <= 2010,
complete_data <= 20000) %>%
cv mutate(birthyear_mean = birthyear - mean(birthyear, na.rm=TRUE))
# Plot correlation between age and NR-score
ggplot(data=complete_data) +
geom_jitter(aes(x=lifesat_recode, y=NR_score), width = 0.4, height = 0.1, alpha = 0.4, size = 2) +
geom_smooth(aes(x=lifesat_recode, y=NR_score), color="red", method="lm", se=TRUE) +
labs(x="Life Satisfaction", y="NR Score") +
theme_minimal()
# Plot correlation between health phys and NR-score
ggplot(data=complete_data) +
geom_jitter(aes(x=healthphys_recode, y=NR_score), width = 0.4, height = 0.1, alpha = 0.4, size = 2) +
geom_smooth(aes(x=healthphys_recode, y=NR_score), color="red", method="lm", se=TRUE) +
labs(x="Health Physical", y="NR Score") +
theme_minimal()
# Plot correlation between health psych and NR-score
ggplot(data=complete_data) +
geom_jitter(aes(x=healthpsych_recode, y=NR_score), width = 0.4, height = 0.1, alpha = 0.4, size = 2) +
geom_smooth(aes(x=healthpsych_recode, y=NR_score), color="red", method="lm", se=TRUE) +
labs(x="Health Psychological", y="NR Score") +
theme_minimal()
ggplot(data=complete_data) +
geom_jitter(aes(x=log(cv+1), y=NR_score), width = 0.4, height = 0.1, alpha = 0.4, size = 2) +
geom_smooth(aes(x=log(cv+1), y=NR_score), color="red", method="lm", se=TRUE) +
labs(x="CV (log +1)", y="NR Score") +
theme_minimal()
ggplot(data=complete_data) +
geom_jitter(aes(x=healthpsych_recode, y=lifesat_recode), width = 0.4, height = 0.1, alpha = 0.4, size = 2) +
geom_smooth(aes(x=healthpsych_recode, y=lifesat_recode), color="red", method="lm", se=TRUE) +
labs(x="Health Psychological", y="Lifesat") +
theme_minimal()
ggplot(data=complete_data) +
geom_jitter(aes(x=healthphys_recode, y=lifesat_recode), width = 0.4, height = 0.1, alpha = 0.4, size = 2) +
geom_smooth(aes(x=healthphys_recode, y=lifesat_recode), color="red", method="lm", se=TRUE) +
labs(x="Health Physical", y="Lifesat") +
theme_minimal()
ggplot(data=complete_data) +
geom_jitter(aes(x=healthpsych_recode, y=healthphys_recode), width = 0.4, height = 0.1, alpha = 0.4, size = 2) +
geom_smooth(aes(x=healthpsych_recode, y=healthphys_recode), color="red", method="lm", se=TRUE) +
labs(x="Health Psychological", y="Health Physical") +
theme_minimal()
ggplot(data=complete_data) +
geom_jitter(aes(x=healthpsych_recode, y=log(cv+1)), width = 0.4, height = 0.1, alpha = 0.4, size = 2) +
geom_smooth(aes(x=healthpsych_recode, y=log(cv+1)), color="red", method="lm", se=TRUE) +
labs(x="Health Psychological", y="CV (log+1)") +
theme_minimal()
ggplot(data=complete_data) +
geom_jitter(aes(x=healthphys_recode, y=log(cv+1)), width = 0.4, height = 0.1, alpha = 0.4, size = 2) +
geom_smooth(aes(x=healthphys_recode, y=log(cv+1)), color="red", method="lm", se=TRUE) +
labs(x="Health Physical", y="CV (log+1)") +
theme_minimal()
::htmlreg(lm(NR_score ~ gender_male + birthyear_mean + healthphys_recode + healthpsych_recode + lifesat_recode + log(cv+1), data=complete_data), digits=3) texreg
Model 1 | |
---|---|
(Intercept) | 3.124*** |
(0.031) | |
gender_male | -0.128*** |
(0.016) | |
birthyear_mean | -0.007*** |
(0.001) | |
healthphys_recode | 0.018*** |
(0.005) | |
healthpsych_recode | -0.008 |
(0.005) | |
lifesat_recode | 0.014** |
(0.005) | |
log(cv + 1) | 0.146*** |
(0.004) | |
R2 | 0.120 |
Adj. R2 | 0.120 |
Num. obs. | 10262 |
***p < 0.001; **p < 0.01; *p < 0.05 |
sd(complete_data$NR_score, na.rm = TRUE)
[1] 0.8456547
::htmlreg(lm(NR_score ~ gender_male + birthyear_mean + healthphys_recode + log(cv+1), data=complete_data), digits=3) texreg
Model 1 | |
---|---|
(Intercept) | 3.134*** |
(0.028) | |
gender_male | -0.127*** |
(0.016) | |
birthyear_mean | -0.007*** |
(0.001) | |
healthphys_recode | 0.022*** |
(0.004) | |
log(cv + 1) | 0.147*** |
(0.004) | |
R2 | 0.120 |
Adj. R2 | 0.120 |
Num. obs. | 10298 |
***p < 0.001; **p < 0.01; *p < 0.05 |
::htmlreg(lm(NR_score ~ gender_male + birthyear_mean + healthpsych_recode + log(cv+1), data=complete_data), digits=3) texreg
Model 1 | |
---|---|
(Intercept) | 3.197*** |
(0.028) | |
gender_male | -0.130*** |
(0.016) | |
birthyear_mean | -0.006*** |
(0.001) | |
healthpsych_recode | 0.010** |
(0.003) | |
log(cv + 1) | 0.149*** |
(0.004) | |
R2 | 0.117 |
Adj. R2 | 0.117 |
Num. obs. | 10317 |
***p < 0.001; **p < 0.01; *p < 0.05 |
::htmlreg(lm(NR_score ~ gender_male + birthyear_mean + lifesat_recode + log(cv+1), data=complete_data), digits=3) texreg
Model 1 | |
---|---|
(Intercept) | 3.146*** |
(0.028) | |
gender_male | -0.131*** |
(0.016) | |
birthyear_mean | -0.006*** |
(0.000) | |
lifesat_recode | 0.020*** |
(0.004) | |
log(cv + 1) | 0.146*** |
(0.004) | |
R2 | 0.119 |
Adj. R2 | 0.119 |
Num. obs. | 10327 |
***p < 0.001; **p < 0.01; *p < 0.05 |
<- st_as_sf(data_sf, coords = c("lon", "lat"), crs = 4326)
data_sf
## Nearest Neighbors
<- st_coordinates(data_sf) # Extract coordinates
coords <- knearneigh(coords, k = 10) # Create k-nearest neighbors (k=6)
neighbors <- knn2nb(neighbors) # Convert to neighbor list
nb_list
<- nb2listw(nb_list, style = "W", zero.policy = TRUE)
weights
## Distance based
<- st_transform(data_sf, crs = 25832)
data_sf <- 20000 # in meters (50km)
distance_threshold
<- st_coordinates(data_sf)
coords <- as.matrix(dist(coords)) # Compute all distances
dist_matrix <- dnearneigh(coords, d1 = 0, d2 = distance_threshold)
nb_dist
<- nb2listw(nb_dist, style = "W", zero.policy = TRUE)
weights_dist
print("Test for spatial autocorrelation of NR Distance")
[1] "Test for spatial autocorrelation of NR Distance"
<- moran.test(data_sf$NR_score, listw = weights_dist, zero.policy = TRUE)
morans_test print(morans_test)
Moran I test under randomisation
data: data_sf$NR_score
weights: weights_dist
n reduced by no-neighbour observations
Moran I statistic standard deviate = 0.878, p-value = 0.19
alternative hypothesis: greater
sample estimates:
Moran I statistic Expectation Variance
1.909860e-03 -9.433072e-05 5.210661e-06
print("Test for spatial autocorrelation of NR K Nearest Neighbors")
[1] "Test for spatial autocorrelation of NR K Nearest Neighbors"
<- moran.test(data_sf$NR_score, listw = weights, zero.policy = TRUE)
morans_test print(morans_test)
Moran I test under randomisation
data: data_sf$NR_score
weights: weights
Moran I statistic standard deviate = 4.6401, p-value = 1.741e-06
alternative hypothesis: greater
sample estimates:
Moran I statistic Expectation Variance
1.866652e-02 -9.429514e-05 1.634746e-05
# Ensure same CRS for germany_sf and data_sf
<- st_transform(data_sf, crs = 32633) # Example UTM projection (adjust as needed)
data_sf <- st_transform(germany_sf, crs = 32633)
germany_sf <- st_transform(federal_sf, crs = 32633)
federal_sf
# Extract coordinates from data
<- st_coordinates(data_sf)
coords
# Convert germany_sf to a spatstat window
<- as.owin(germany_sf) # Convert sf to Spatial first
germany_owin
# Create ppp object with Germany as window
<- ppp(coords[,1], coords[,2], window = germany_owin, marks = data_sf$NR_score)
points_ppp
# Perform Gaussian kernel density estimation
<- Smooth.ppp(points_ppp, sigma = 35000, W = germany_owin) # Adjust sigma for smoothness
density_map
# Convert density raster to dataframe for ggplot
<- as.data.frame(as.im(density_map))
density_df colnames(density_df) <- c("x", "y", "density")
# Plot KDE map
ggplot() +
geom_raster(data = density_df, aes(x = x, y = y, fill = density)) +
scale_fill_gradientn(
colours = c("red", "yellow", "darkgreen"),
name = "NR Score"
+
) geom_sf(data = federal_sf, fill = NA, color = "black", size = 0.8) +
theme_minimal() +
labs(title = "Heatmap NR") +
coord_sf()
### Life sat
<- data_sf %>% filter(!is.na(lifesat_recode))
data_sf <- st_coordinates(data_sf)
coords <- ppp(coords[,1], coords[,2], window = germany_owin, marks = data_sf$lifesat_recode)
points_ppp
# Perform Gaussian kernel density estimation
<- Smooth.ppp(points_ppp, sigma = 30000, W = germany_owin) # Adjust sigma for smoothness
density_map
# Convert density raster to dataframe for ggplot
<- as.data.frame(as.im(density_map))
density_df colnames(density_df) <- c("x", "y", "density")
# Plot KDE map
ggplot() +
geom_raster(data = density_df, aes(x = x, y = y, fill = density)) +
scale_fill_gradientn(
colours = c("red", "yellow", "darkgreen"),
name = "Lifesat Index"
+
) geom_sf(data = federal_sf, fill = NA, color = "black", size = 0.8) +
theme_minimal() +
labs(title = "Heatmap Lifesat") +
coord_sf()
# Health Phys
<- data_sf %>% filter(!is.na(healthphys_recode))
data_sf <- st_coordinates(data_sf)
coords <- ppp(coords[,1], coords[,2], window = germany_owin, marks = data_sf$healthphys_recode)
points_ppp
# Perform Gaussian kernel density estimation
<- Smooth.ppp(points_ppp, sigma = 30000, W = germany_owin) # Adjust sigma for smoothness
density_map # Convert density raster to dataframe for ggplot
<- as.data.frame(as.im(density_map))
density_df colnames(density_df) <- c("x", "y", "density")
# Plot KDE map
ggplot() +
geom_raster(data = density_df, aes(x = x, y = y, fill = density)) +
scale_fill_gradientn(
colours = c("red", "yellow", "darkgreen"),
name = "Health Phys Index"
+
) geom_sf(data = federal_sf, fill = NA, color = "black", size = 0.8) +
theme_minimal() +
labs(title = "Heatmap Health Phys") +
coord_sf()
# Health Psych
<- data_sf %>% filter(!is.na(healthpsych_recode))
data_sf <- st_coordinates(data_sf)
coords <- ppp(coords[,1], coords[,2], window = germany_owin, marks = data_sf$healthpsych_recode)
points_ppp
# Perform Gaussian kernel density estimation
<- Smooth.ppp(points_ppp, sigma = 30000, W = germany_owin) # Adjust sigma for smoothness
density_map # Convert density raster to dataframe for ggplot
<- as.data.frame(as.im(density_map))
density_df colnames(density_df) <- c("x", "y", "density")
# Plot KDE map
ggplot() +
geom_raster(data = density_df, aes(x = x, y = y, fill = density)) +
scale_fill_gradientn(
colours = c("red", "yellow", "darkgreen"),
name = "Health Psych Index"
+
) geom_sf(data = federal_sf, fill = NA, color = "black", size = 0.8) +
theme_minimal() +
labs(title = "Heatmap Health Psych") +
coord_sf()
<- Smooth.ppp(points_ppp, sigma = 18000, W = germany_owin) # Adjust sigma for smoothness
density_map # Convert density raster to dataframe for ggplot
<- as.data.frame(as.im(density_map))
density_df colnames(density_df) <- c("x", "y", "density")
# Plot KDE map
ggplot() +
geom_raster(data = density_df, aes(x = x, y = y, fill = density)) +
scale_fill_gradientn(
colours = c("red", "yellow", "darkgreen"),
name = "Health Psych Index"
+
) geom_sf(data = federal_sf, fill = NA, color = "black", size = 0.8) +
theme_minimal() +
labs(title = "Heatmap Health Psych (less smoothing)") +
coord_sf()
# Birthyear
<- data_sf %>% filter(!is.na(birthyear))
data_sf <- st_coordinates(data_sf)
coords <- ppp(coords[,1], coords[,2], window = germany_owin, marks = data_sf$birthyear)
points_ppp # Perform Gaussian kernel density estimation
<- Smooth.ppp(points_ppp, sigma = 25000, W = germany_owin) # Adjust sigma for smoothness
density_map # Convert density raster to dataframe for ggplot
<- as.data.frame(as.im(density_map))
density_df colnames(density_df) <- c("x", "y", "density")
# Plot KDE map
ggplot() +
geom_raster(data = density_df, aes(x = x, y = y, fill = density)) +
scale_fill_gradientn(
colours = c("red", "yellow", "darkgreen"),
name = "Average Birthyear"
+
) geom_sf(data = federal_sf, fill = NA, color = "black", size = 0.8) +
theme_minimal() +
labs(title = "Heatmap Birthyear") +
coord_sf()