if (file.exists(“incidents.csv”)) { file_name <- “incidents.csv” } else if (file.exists(“incidents (5).csv”)) { file_name <- “incidents (5).csv” } else { stop(“File not found: incidents.csv or incidents (5).csv. Please upload/rename your file.”) }
regression1 <- read.csv(file_name, header = TRUE, sep = “,”) str(regression1) summary(regression1)
regression1\(population <- as.numeric(gsub(",", "", regression1\)population))
regression2 <- regression1[, c(“zone”, “population”, “incidents”)] head(regression2) str(regression2)
reg.fit1 <- lm(incidents ~ population, data = regression2) summary(reg.fit1)
reg.fit2 <- lm(incidents ~ zone + population, data = regression2) summary(reg.fit2)
reg.fit2_int <- lm(incidents ~ zone * population, data = regression2) summary(reg.fit2_int)
regression2\(zone_bin <- ifelse(regression2\)zone == “west”, 1, 0)
interaction <- regression2\(zone_bin * regression2\)population
reg.fit3 <- lm(incidents ~ interaction + population + zone_bin, data = regression2) summary(reg.fit3)
reg.fit4 <- lm(incidents ~ interaction, data = regression2) summary(reg.fit4)
model_glm <- glm(incidents ~ zone + offset(log(population)), data = regression2, family = poisson(link = “log”)) summary(model_glm)
model_simple_glm <- glm(incidents ~ zone, data = regression2, family = poisson(link = “log”)) summary(model_simple_glm)
model_int_glm <- glm(incidents ~ zone * population + offset(log(population)), data = regression2, family = poisson(link = “log”)) summary(model_int_glm)
pseudo_r2 <- 1 - (model_int_glm\(deviance / model_int_glm\)null.deviance) pseudo_r2
irr_zone <- exp(coef(model_glm)) irr_zone
Conclusion The linear regression models show that population alone is not a strong predictor of incidents, but adding zone improves the model fit. Because incidents are count data, Poisson regression is more appropriate. The offset(log(population)) is important because it adjusts for differences in population size across areas, allowing us to model incident rates instead of raw counts. The Poisson model with interaction provides a stronger fit, and McFadden’s pseudo-R2 is reported above.