Rows: 3160 Columns: 10
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (3): state_name, county_fips, county_name
dbl (7): votes_gop, votes_dem, total_votes, diff, per_gop, per_dem, per_poin...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
the_caption ="Source: data from tonmcg and CDC; analysis by freerangestats.info"combined |>ggplot(aes(x= cpe, y = per_gop)) +geom_point(colour ="steelblue", alpha =0.5) +geom_smooth(method ="lm", fill ="black", colour ="white", alpha =0.8) +scale_x_continuous(label = percent) +scale_y_continuous(label = percent) +labs(x ="Crude prevalence estimate of depression",y ="Percentage vote for Trump in 2024 election",subtitle ="Line is ordinary least squares fit to all county data together",title ="Counties with more depression voted more for Trump",caption = the_caption)
preds2 <-predict(quasi_model, type ="response", se.fit =TRUE)# draw chart:combined |>mutate(fit = preds2$fit,se = preds2$se.fit,lower = fit -1.96* se,upper = fit +1.96* se) |>ggplot(aes(x = cpe, y = per_gop)) +geom_point(colour ="steelblue", alpha =0.5) +geom_ribbon(aes(ymin = lower, ymax = upper), fill ="black", alpha =0.5) +geom_line(aes(y = fit), colour ="white") +theme(legend.position ="none") +scale_y_continuous(limits =c(0, 1), label = percent) +scale_x_continuous(label = percent) +labs(x ="Crude prevalence estimate of depression",y ="Percentage vote for Trump in 2024 election",subtitle ="Line is generalized linear model with quasibinomial response, fit to all county data together",title ="Counties with more depression voted more for Trump",caption = the_caption)
Reading layer `cb_2023_us_county_500k' from data source
`/Users/dongshuhao/Desktop/trump/dataset/cb_2023_us_county_500k/cb_2023_us_county_500k.shp'
using driver `ESRI Shapefile'
Simple feature collection with 3235 features and 12 fields
Geometry type: MULTIPOLYGON
Dimension: XY
Bounding box: xmin: -179.1467 ymin: -14.5487 xmax: 179.7785 ymax: 71.38782
Geodetic CRS: NAD83
# 提取质心county_cent <- counties |>st_centroid()
Warning: st_centroid assumes attributes are constant over geometries
sc <-st_coordinates(county_cent) # X和Y坐标county_cent <- county_cent |>mutate(x = sc[, 1],y = sc[, 2],# combine the two digit state code with the 3 digit county code:county_fips =paste0(STATEFP, COUNTYFP))# 合并数据并检查combined2 <- combined |>left_join(county_cent, by ="county_fips")ggplot(combined2, aes(x = x, y = y, colour = state_name)) +geom_point() +theme(legend.position ="none") +coord_map() +labs(title ="Centres of counties after merging data")
# 拟合模型model6 <-gam(per_gop ~ cpe +s(x, y) +s(state_name, bs ='re') , family = quasibinomial, weights = total_votes,data = combined2)# the spatial rubber mat that is correcting for spatial correlation for us;# scheme=1 is what makes it draw a perspective plot rather than contour or# heatmap):plot(model6, select =1, scheme =1, main ="Higher vote for GOP")
绘制预期
preds6 <-predict(model6, se.fit =TRUE, type ="response")combined2 |>mutate(fit = preds6$fit,se = preds6$se.fit,lower = fit -1.96* se,upper = fit +1.96* se) |>ggplot(aes(x = cpe, group = state_name)) +geom_point(aes(y = per_gop, colour = state_name), alpha =0.5) +geom_ribbon(aes(ymin = lower, ymax = upper), fill ="black", alpha =0.5) +theme(legend.position ="none") +scale_y_continuous(limits =c(0, 1), label = percent) +scale_x_continuous(label = percent) +labs(x ="Crude prevalence estimate of depression",y ="Percentage vote for Trump in 2024 election",subtitle ="Grey ribbons are 95% confidence intervals from quasibinomial generalized additive model with spatial effect and state-level random intercept effect",title ="Counties with more depression voted more for Trump",caption = the_caption) +facet_wrap(~state_name)
Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
-Inf
Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
-Inf