getwd()
[1] "/cloud/project"
regression1<-read.csv("incidents.csv",header=T,sep = ",")
str(regression1)
'data.frame': 16 obs. of 4 variables:
$ area : chr "Boulder" "California-lexington" "Huntsville" "Seattle" ...
$ zone : chr "west" "east" "east" "west" ...
$ population: chr "107,353" "326,534" "444,752" "750,000" ...
$ incidents : int 605 103 161 1703 1003 527 721 704 105 403 ...
summary(regression1)
area zone population incidents
Length:16 Length:16 Length:16 Min. : 103.0
Class :character Class :character Class :character 1st Qu.: 277.8
Mode :character Mode :character Mode :character Median : 654.0
Mean : 695.2
3rd Qu.: 853.0
Max. :2072.0
pkg <- c("ggplot2", "scales", "maptools",
"sp", "maps", "grid", "car" )
new.pkg <- pkg[!(pkg %in% installed.packages())]
if (length(new.pkg)) {
install.packages(new.pkg)
}
trying URL 'http://rspm/default/__linux__/noble/latest/src/contrib/pkgconfig_2.0.3.tar.gz'
trying URL 'http://rspm/default/__linux__/noble/latest/src/contrib/utf8_1.2.6.tar.gz'
trying URL 'http://rspm/default/__linux__/noble/latest/src/contrib/colorspace_2.1-2.tar.gz'
trying URL 'http://rspm/default/__linux__/noble/latest/src/contrib/fracdiff_1.5-3.tar.gz'
trying URL 'http://rspm/default/__linux__/noble/latest/src/contrib/lmtest_0.9-40.tar.gz'
trying URL 'http://rspm/default/__linux__/noble/latest/src/contrib/timeDate_4052.112.tar.gz'
trying URL 'http://rspm/default/__linux__/noble/latest/src/contrib/urca_1.3-4.tar.gz'
trying URL 'http://rspm/default/__linux__/noble/latest/src/contrib/zoo_1.8-15.tar.gz'
trying URL 'http://rspm/default/__linux__/noble/latest/src/contrib/RcppArmadillo_15.2.3-1.tar.gz'
trying URL 'http://rspm/default/__linux__/noble/latest/src/contrib/backports_1.5.0.tar.gz'
trying URL 'http://rspm/default/__linux__/noble/latest/src/contrib/generics_0.1.4.tar.gz'
trying URL 'http://rspm/default/__linux__/noble/latest/src/contrib/purrr_1.2.1.tar.gz'
trying URL 'http://rspm/default/__linux__/noble/latest/src/contrib/tibble_3.3.1.tar.gz'
trying URL 'http://rspm/default/__linux__/noble/latest/src/contrib/tidyr_1.3.2.tar.gz'
trying URL 'http://rspm/default/__linux__/noble/latest/src/contrib/lifecycle_1.0.5.tar.gz'
trying URL 'http://rspm/default/__linux__/noble/latest/src/contrib/pillar_1.11.1.tar.gz'
trying URL 'http://rspm/default/__linux__/noble/latest/src/contrib/rlang_1.1.7.tar.gz'
trying URL 'http://rspm/default/__linux__/noble/latest/src/contrib/tidyselect_1.2.1.tar.gz'
trying URL 'http://rspm/default/__linux__/noble/latest/src/contrib/vctrs_0.7.1.tar.gz'
trying URL 'http://rspm/default/__linux__/noble/latest/src/contrib/cowplot_1.2.0.tar.gz'
trying URL 'http://rspm/default/__linux__/noble/latest/src/contrib/Deriv_4.2.0.tar.gz'
trying URL 'http://rspm/default/__linux__/noble/latest/src/contrib/forecast_9.0.1.tar.gz'
trying URL 'http://rspm/default/__linux__/noble/latest/src/contrib/modelr_0.1.11.tar.gz'
trying URL 'http://rspm/default/__linux__/noble/latest/src/contrib/microbenchmark_1.5.0.tar.gz'
trying URL 'http://rspm/default/__linux__/noble/latest/src/contrib/rbibutils_2.4.1.tar.gz'
trying URL 'http://rspm/default/__linux__/noble/latest/src/contrib/cpp11_0.5.3.tar.gz'
trying URL 'http://rspm/default/__linux__/noble/latest/src/contrib/broom_1.0.12.tar.gz'
trying URL 'http://rspm/default/__linux__/noble/latest/src/contrib/dplyr_1.2.0.tar.gz'
trying URL 'http://rspm/default/__linux__/noble/latest/src/contrib/numDeriv_2016.8-1.1.tar.gz'
trying URL 'http://rspm/default/__linux__/noble/latest/src/contrib/doBy_4.7.1.tar.gz'
trying URL 'http://rspm/default/__linux__/noble/latest/src/contrib/SparseM_1.84-2.tar.gz'
trying URL 'http://rspm/default/__linux__/noble/latest/src/contrib/MatrixModels_0.5-4.tar.gz'
trying URL 'http://rspm/default/__linux__/noble/latest/src/contrib/minqa_1.2.8.tar.gz'
trying URL 'http://rspm/default/__linux__/noble/latest/src/contrib/nloptr_2.2.1.tar.gz'
trying URL 'http://rspm/default/__linux__/noble/latest/src/contrib/reformulas_0.4.4.tar.gz'
trying URL 'http://rspm/default/__linux__/noble/latest/src/contrib/Rdpack_2.6.6.tar.gz'
trying URL 'http://rspm/default/__linux__/noble/latest/src/contrib/RcppEigen_0.3.4.0.2.tar.gz'
trying URL 'http://rspm/default/__linux__/noble/latest/src/contrib/gtable_0.3.6.tar.gz'
trying URL 'http://rspm/default/__linux__/noble/latest/src/contrib/isoband_0.3.0.tar.gz'
trying URL 'http://rspm/default/__linux__/noble/latest/src/contrib/S7_0.2.1.tar.gz'
trying URL 'http://rspm/default/__linux__/noble/latest/src/contrib/withr_3.0.2.tar.gz'
trying URL 'http://rspm/default/__linux__/noble/latest/src/contrib/farver_2.1.2.tar.gz'
trying URL 'http://rspm/default/__linux__/noble/latest/src/contrib/labeling_0.4.3.tar.gz'
trying URL 'http://rspm/default/__linux__/noble/latest/src/contrib/RColorBrewer_1.1-3.tar.gz'
trying URL 'http://rspm/default/__linux__/noble/latest/src/contrib/viridisLite_0.4.3.tar.gz'
trying URL 'http://rspm/default/__linux__/noble/latest/src/contrib/carData_3.0-6.tar.gz'
trying URL 'http://rspm/default/__linux__/noble/latest/src/contrib/abind_1.4-8.tar.gz'
trying URL 'http://rspm/default/__linux__/noble/latest/src/contrib/Formula_1.2-5.tar.gz'
trying URL 'http://rspm/default/__linux__/noble/latest/src/contrib/pbkrtest_0.5.5.tar.gz'
trying URL 'http://rspm/default/__linux__/noble/latest/src/contrib/quantreg_6.1.tar.gz'
trying URL 'http://rspm/default/__linux__/noble/latest/src/contrib/lme4_1.1-38.tar.gz'
trying URL 'http://rspm/default/__linux__/noble/latest/src/contrib/ggplot2_4.0.2.tar.gz'
trying URL 'http://rspm/default/__linux__/noble/latest/src/contrib/scales_1.4.0.tar.gz'
trying URL 'http://rspm/default/__linux__/noble/latest/src/contrib/sp_2.2-1.tar.gz'
trying URL 'http://rspm/default/__linux__/noble/latest/src/contrib/maps_3.4.3.tar.gz'
trying URL 'http://rspm/default/__linux__/noble/latest/src/contrib/car_3.1-5.tar.gz'
The downloaded source packages are in
‘/tmp/RtmpGSLznQ/downloaded_packages’
Fit a Simple Linear Regression (SLR) to see if population size alone
predicts incident counts.
reg.fit1<-lm(regression2$incidents~regression2$population)
summary(reg.fit1)
Call:
lm(formula = regression2$incidents ~ regression2$population)
Residuals:
Min 1Q Median 3Q Max
-684.5 -363.5 -156.2 133.9 1164.7
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 4.749e+02 2.018e+02 2.353 0.0337 *
regression2$population 8.462e-05 5.804e-05 1.458 0.1669
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 534.9 on 14 degrees of freedom
Multiple R-squared: 0.1318, Adjusted R-squared: 0.0698
F-statistic: 2.126 on 1 and 14 DF, p-value: 0.1669
Add ‘zone’ as a categorical predictor to see if geography explains
the variance better than numbers alone
reg.fit2<-lm(incidents~zone+population,data=regression2)
summary(reg.fit2)
Call:
lm(formula = incidents ~ zone + population, data = regression2)
Residuals:
Min 1Q Median 3Q Max
-537.21 -273.14 -57.89 188.17 766.03
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 1.612e+02 1.675e+02 0.962 0.35363
zone 7.266e+02 1.938e+02 3.749 0.00243 **
population 6.557e-05 4.206e-05 1.559 0.14300
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 384.8 on 13 degrees of freedom
Multiple R-squared: 0.5828, Adjusted R-squared: 0.5186
F-statistic: 9.081 on 2 and 13 DF, p-value: 0.003404
regression2$zone<-ifelse(regression2$zone=="west",1,0)
# Create an interaction term to test if the effect of population on incidents changes depending on the zone.
interaction<-regression2$zone*regression2$population
reg.fit3<-lm(regression2$incidents~interaction+regression2$population+regression2$zone)
summary(reg.fit3)
Call:
lm(formula = regression2$incidents ~ interaction + regression2$population +
regression2$zone)
Residuals:
Min 1Q Median 3Q Max
-684.5 -363.5 -156.2 133.9 1164.7
Coefficients: (2 not defined because of singularities)
Estimate Std. Error t value Pr(>|t|)
(Intercept) 4.749e+02 2.018e+02 2.353 0.0337 *
interaction NA NA NA NA
regression2$population 8.462e-05 5.804e-05 1.458 0.1669
regression2$zone NA NA NA NA
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 534.9 on 14 degrees of freedom
Multiple R-squared: 0.1318, Adjusted R-squared: 0.0698
F-statistic: 2.126 on 1 and 14 DF, p-value: 0.1669
reg.fit4<-lm(regression2$incidents~interaction)
summary(reg.fit4)
Call:
lm(formula = regression2$incidents ~ interaction)
Residuals:
Min 1Q Median 3Q Max
-592.19 -417.44 -41.19 157.81 1376.81
Coefficients: (1 not defined because of singularities)
Estimate Std. Error t value Pr(>|t|)
(Intercept) 695.2 138.7 5.014 0.000154 ***
interaction NA NA NA NA
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 554.6 on 15 degrees of freedom
model_glm <- glm(incidents ~ zone + offset(log(population)),
data = regression2,
family = poisson(link = "log"))
summary(model_glm)
Call:
glm(formula = incidents ~ zone + offset(log(population)), family = poisson(link = "log"),
data = regression2)
Coefficients: (1 not defined because of singularities)
Estimate Std. Error z value Pr(>|z|)
(Intercept) -8.228182 0.009482 -867.8 <2e-16 ***
zone NA NA NA NA
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
(Dispersion parameter for poisson family taken to be 1)
Null deviance: 16731 on 15 degrees of freedom
Residual deviance: 16731 on 15 degrees of freedom
AIC: 16861
Number of Fisher Scoring iterations: 6
model_simple_glm <- glm(incidents ~ zone, data = regression2, family = poisson)
summary(model_simple_glm)
Call:
glm(formula = incidents ~ zone, family = poisson, data = regression2)
Coefficients: (1 not defined because of singularities)
Estimate Std. Error z value Pr(>|z|)
(Intercept) 6.544182 0.009482 690.2 <2e-16 ***
zone NA NA NA NA
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
(Dispersion parameter for poisson family taken to be 1)
Null deviance: 6200.4 on 15 degrees of freedom
Residual deviance: 6200.4 on 15 degrees of freedom
AIC: 6331.2
Number of Fisher Scoring iterations: 5
Use a Poisson GLM because incidents are ‘count data’ (non-negative
integers) which standard regression often misrepresents
model_int_glm <- glm(incidents ~ zone * population + offset(log(population)),
data = regression2,
family = poisson)
summary(model_int_glm)
Call:
glm(formula = incidents ~ zone * population + offset(log(population)),
family = poisson, data = regression2)
Coefficients: (2 not defined because of singularities)
Estimate Std. Error z value Pr(>|z|)
(Intercept) -7.009e+00 1.772e-02 -395.52 <2e-16 ***
zone NA NA NA NA
population -3.092e-07 4.607e-09 -67.12 <2e-16 ***
zone:population NA NA NA NA
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
(Dispersion parameter for poisson family taken to be 1)
Null deviance: 16731 on 15 degrees of freedom
Residual deviance: 11968 on 14 degrees of freedom
AIC: 12100
Number of Fisher Scoring iterations: 6
pseudo_r2 <- 1 - (model_int_glm$deviance / model_int_glm$null.deviance)
print(pseudo_r2)
[1] 0.2846857
LS0tCnRpdGxlOiAiUiBOb3RlYm9vayIKb3V0cHV0OiBodG1sX25vdGVib29rCi0tLQogCmBgYHtyfQpnZXR3ZCgpCmBgYAoKYGBge3J9CnJlZ3Jlc3Npb24xPC1yZWFkLmNzdigiaW5jaWRlbnRzLmNzdiIsaGVhZGVyPVQsc2VwID0gIiwiKQpzdHIocmVncmVzc2lvbjEpCmBgYAoKYGBge3J9CnN1bW1hcnkocmVncmVzc2lvbjEpCmBgYAoKYGBge3J9CnBrZyA8LSBjKCJnZ3Bsb3QyIiwgInNjYWxlcyIsICJtYXB0b29scyIsCiAgICAgICAgICAgICAgInNwIiwgIm1hcHMiLCAiZ3JpZCIsICJjYXIiICkKbmV3LnBrZyA8LSBwa2dbIShwa2cgJWluJSBpbnN0YWxsZWQucGFja2FnZXMoKSldCmlmIChsZW5ndGgobmV3LnBrZykpIHsKICBpbnN0YWxsLnBhY2thZ2VzKG5ldy5wa2cpICAKfQpgYGAKCiMgUmVtb3ZlIGNvbW1hcyBhbmQgY29udmVydCB0aGUgJ3BvcHVsYXRpb24nIHN0cmluZyBpbnRvIGEgbnVtZXJpYyBmb3JtYXQgZm9yIGNhbGN1bGF0aW9uLgpgYGB7cn0KcmVncmVzc2lvbjEkcG9wdWxhdGlvbiA8LSBhcy5udW1lcmljKGdzdWIoIiwiLCIiLHJlZ3Jlc3Npb24xJHBvcHVsYXRpb24pKQpyZWdyZXNzaW9uMSRwb3B1bGF0aW9uCmBgYAoKYGBge3J9CnJlZ3Jlc3Npb24yPC1yZWdyZXNzaW9uMVssLTFdCmhlYWQocmVncmVzc2lvbjIpCmBgYAojIEZpdCBhIFNpbXBsZSBMaW5lYXIgUmVncmVzc2lvbiAoU0xSKSB0byBzZWUgaWYgcG9wdWxhdGlvbiBzaXplIGFsb25lIHByZWRpY3RzIGluY2lkZW50IGNvdW50cy4KYGBge3J9CnJlZy5maXQxPC1sbShyZWdyZXNzaW9uMiRpbmNpZGVudHN+cmVncmVzc2lvbjIkcG9wdWxhdGlvbikKc3VtbWFyeShyZWcuZml0MSkKYGBgCgojIEFkZCAnem9uZScgYXMgYSBjYXRlZ29yaWNhbCBwcmVkaWN0b3IgdG8gc2VlIGlmIGdlb2dyYXBoeSBleHBsYWlucyB0aGUgdmFyaWFuY2UgYmV0dGVyIHRoYW4gbnVtYmVycyBhbG9uZQpgYGB7cn0KcmVnLmZpdDI8LWxtKGluY2lkZW50c356b25lK3BvcHVsYXRpb24sZGF0YT1yZWdyZXNzaW9uMikKc3VtbWFyeShyZWcuZml0MikKYGBgCgpgYGB7cn0KcmVncmVzc2lvbjIkem9uZTwtaWZlbHNlKHJlZ3Jlc3Npb24yJHpvbmU9PSJ3ZXN0IiwxLDApCgojIENyZWF0ZSBhbiBpbnRlcmFjdGlvbiB0ZXJtIHRvIHRlc3QgaWYgdGhlIGVmZmVjdCBvZiBwb3B1bGF0aW9uIG9uIGluY2lkZW50cyBjaGFuZ2VzIGRlcGVuZGluZyBvbiB0aGUgem9uZS4KCmludGVyYWN0aW9uPC1yZWdyZXNzaW9uMiR6b25lKnJlZ3Jlc3Npb24yJHBvcHVsYXRpb24KcmVnLmZpdDM8LWxtKHJlZ3Jlc3Npb24yJGluY2lkZW50c35pbnRlcmFjdGlvbityZWdyZXNzaW9uMiRwb3B1bGF0aW9uK3JlZ3Jlc3Npb24yJHpvbmUpCnN1bW1hcnkocmVnLmZpdDMpCmBgYAoKYGBge3J9CnJlZy5maXQ0PC1sbShyZWdyZXNzaW9uMiRpbmNpZGVudHN+aW50ZXJhY3Rpb24pCnN1bW1hcnkocmVnLmZpdDQpCmBgYAoKYGBge3J9Cm1vZGVsX2dsbSA8LSBnbG0oaW5jaWRlbnRzIH4gem9uZSArIG9mZnNldChsb2cocG9wdWxhdGlvbikpLCAKICAgICAgICAgICAgICAgICBkYXRhID0gcmVncmVzc2lvbjIsIAogICAgICAgICAgICAgICAgIGZhbWlseSA9IHBvaXNzb24obGluayA9ICJsb2ciKSkKCnN1bW1hcnkobW9kZWxfZ2xtKQpgYGAKCmBgYHtyfQptb2RlbF9zaW1wbGVfZ2xtIDwtIGdsbShpbmNpZGVudHMgfiB6b25lLCBkYXRhID0gcmVncmVzc2lvbjIsIGZhbWlseSA9IHBvaXNzb24pCnN1bW1hcnkobW9kZWxfc2ltcGxlX2dsbSkKYGBgCgojIFVzZSBhIFBvaXNzb24gR0xNIGJlY2F1c2UgaW5jaWRlbnRzIGFyZSAnY291bnQgZGF0YScgKG5vbi1uZWdhdGl2ZSBpbnRlZ2Vycykgd2hpY2ggc3RhbmRhcmQgcmVncmVzc2lvbiBvZnRlbiBtaXNyZXByZXNlbnRzCgpgYGB7cn0KbW9kZWxfaW50X2dsbSA8LSBnbG0oaW5jaWRlbnRzIH4gem9uZSAqIHBvcHVsYXRpb24gKyBvZmZzZXQobG9nKHBvcHVsYXRpb24pKSwgCiAgICAgICAgICAgICAgICAgICAgIGRhdGEgPSByZWdyZXNzaW9uMiwgCiAgICAgICAgICAgICAgICAgICAgIGZhbWlseSA9IHBvaXNzb24pCnN1bW1hcnkobW9kZWxfaW50X2dsbSkKYGBgCgpgYGB7cn0KcHNldWRvX3IyIDwtIDEgLSAobW9kZWxfaW50X2dsbSRkZXZpYW5jZSAvIG1vZGVsX2ludF9nbG0kbnVsbC5kZXZpYW5jZSkKcHJpbnQocHNldWRvX3IyKQpgYGAK