Try both possible filenames

if (file.exists(“incidents.csv”)) { file_name <- “incidents.csv” } else if (file.exists(“incidents (5).csv”)) { file_name <- “incidents (5).csv” } else { stop(“File not found: incidents.csv or incidents (5).csv. Please upload/rename your file.”) }

regression1 <- read.csv(file_name, header = TRUE, sep = “,”) str(regression1) summary(regression1)

Convert population to numeric (remove commas)

regression1\(population <- as.numeric(gsub(",", "", regression1\)population))

Keep only zone, population, incidents

regression2 <- regression1[, c(“zone”, “population”, “incidents”)] head(regression2) str(regression2)

reg.fit1 <- lm(incidents ~ population, data = regression2) summary(reg.fit1)

reg.fit2 <- lm(incidents ~ zone + population, data = regression2) summary(reg.fit2)

reg.fit2_int <- lm(incidents ~ zone * population, data = regression2) summary(reg.fit2_int)

Convert zone to 0/1 (west = 1, east = 0)

regression2\(zone_bin <- ifelse(regression2\)zone == “west”, 1, 0)

interaction <- regression2\(zone_bin * regression2\)population

reg.fit3 <- lm(incidents ~ interaction + population + zone_bin, data = regression2) summary(reg.fit3)

reg.fit4 <- lm(incidents ~ interaction, data = regression2) summary(reg.fit4)

model_glm <- glm(incidents ~ zone + offset(log(population)), data = regression2, family = poisson(link = “log”)) summary(model_glm)

model_simple_glm <- glm(incidents ~ zone, data = regression2, family = poisson(link = “log”)) summary(model_simple_glm)

model_int_glm <- glm(incidents ~ zone * population + offset(log(population)), data = regression2, family = poisson(link = “log”)) summary(model_int_glm)

pseudo_r2 <- 1 - (model_int_glm\(deviance / model_int_glm\)null.deviance) pseudo_r2

Incidence Rate Ratio for zone (from the offset model)

irr_zone <- exp(coef(model_glm)) irr_zone

Conclusion The linear regression models show that population alone is not a strong predictor of incidents, but adding zone improves the model fit. Because incidents are count data, Poisson regression is more appropriate. The offset(log(population)) is important because it adjusts for differences in population size across areas, allowing us to model incident rates instead of raw counts. The Poisson model with interaction provides a stronger fit, and McFadden’s pseudo-R2 is reported above.