getwd()
[1] "/cloud/project"
regression1 <- read.csv("incidents.csv",header=T, sep=",")
str(regression1)
'data.frame':   16 obs. of  4 variables:
 $ area      : chr  "Boulder" "California-lexington" "Huntsville" "Seattle" ...
 $ zone      : chr  "west" "east" "east" "west" ...
 $ population: chr  "107,353" "326,534" "444,752" "750,000" ...
 $ incidents : int  605 103 161 1703 1003 527 721 704 105 403 ...
summary(regression1)
     area               zone            population          incidents     
 Length:16          Length:16          Length:16          Min.   : 103.0  
 Class :character   Class :character   Class :character   1st Qu.: 277.8  
 Mode  :character   Mode  :character   Mode  :character   Median : 654.0  
                                                          Mean   : 695.2  
                                                          3rd Qu.: 853.0  
                                                          Max.   :2072.0  
# make sure the packages for this chapter
# are installed, install if necessary
pkg <- c("ggplot2", "scales", "maptools",
              "sp", "maps", "grid", "car" )
new.pkg <- pkg[!(pkg %in% installed.packages())]
if (length(new.pkg)) {
  install.packages(new.pkg)  }
Warning in install.packages :
  package ‘maptools’ is not available for this version of R

A version of this package for your version of R might be available elsewhere,
see the ideas at
https://cran.r-project.org/doc/manuals/r-patched/R-admin.html#Installing-packages
trying URL 'http://rspm/default/__linux__/focal/latest/src/contrib/xts_0.14.1.tar.gz'
trying URL 'http://rspm/default/__linux__/focal/latest/src/contrib/TTR_0.24.4.tar.gz'
trying URL 'http://rspm/default/__linux__/focal/latest/src/contrib/quantmod_0.4.28.tar.gz'
trying URL 'http://rspm/default/__linux__/focal/latest/src/contrib/lmtest_0.9-40.tar.gz'
trying URL 'http://rspm/default/__linux__/focal/latest/src/contrib/tseries_0.10-58.tar.gz'
trying URL 'http://rspm/default/__linux__/focal/latest/src/contrib/tibble_3.3.0.tar.gz'
trying URL 'http://rspm/default/__linux__/focal/latest/src/contrib/tidyr_1.3.2.tar.gz'
trying URL 'http://rspm/default/__linux__/focal/latest/src/contrib/pillar_1.11.1.tar.gz'
trying URL 'http://rspm/default/__linux__/focal/latest/src/contrib/tidyselect_1.2.1.tar.gz'
trying URL 'http://rspm/default/__linux__/focal/latest/src/contrib/cowplot_1.2.0.tar.gz'
trying URL 'http://rspm/default/__linux__/focal/latest/src/contrib/forecast_8.24.0.tar.gz'
trying URL 'http://rspm/default/__linux__/focal/latest/src/contrib/modelr_0.1.11.tar.gz'
trying URL 'http://rspm/default/__linux__/focal/latest/src/contrib/broom_1.0.11.tar.gz'
trying URL 'http://rspm/default/__linux__/focal/latest/src/contrib/dplyr_1.1.4.tar.gz'
trying URL 'http://rspm/default/__linux__/focal/latest/src/contrib/doBy_4.7.1.tar.gz'
trying URL 'http://rspm/default/__linux__/focal/latest/src/contrib/reformulas_0.4.3.tar.gz'
trying URL 'http://rspm/default/__linux__/focal/latest/src/contrib/Rdpack_2.6.4.tar.gz'
trying URL 'http://rspm/default/__linux__/focal/latest/src/contrib/RcppEigen_0.3.4.0.2.tar.gz'
trying URL 'http://rspm/default/__linux__/focal/latest/src/contrib/gtable_0.3.6.tar.gz'
trying URL 'http://rspm/default/__linux__/focal/latest/src/contrib/isoband_0.3.0.tar.gz'
trying URL 'http://rspm/default/__linux__/focal/latest/src/contrib/S7_0.2.1.tar.gz'
trying URL 'http://rspm/default/__linux__/focal/latest/src/contrib/withr_3.0.2.tar.gz'
trying URL 'http://rspm/default/__linux__/focal/latest/src/contrib/farver_2.1.2.tar.gz'
trying URL 'http://rspm/default/__linux__/focal/latest/src/contrib/labeling_0.4.3.tar.gz'
trying URL 'http://rspm/default/__linux__/focal/latest/src/contrib/RColorBrewer_1.1-3.tar.gz'
trying URL 'http://rspm/default/__linux__/focal/latest/src/contrib/viridisLite_0.4.2.tar.gz'
trying URL 'http://rspm/default/__linux__/focal/latest/src/contrib/carData_3.0-5.tar.gz'
trying URL 'http://rspm/default/__linux__/focal/latest/src/contrib/abind_1.4-8.tar.gz'
trying URL 'http://rspm/default/__linux__/focal/latest/src/contrib/Formula_1.2-5.tar.gz'
trying URL 'http://rspm/default/__linux__/focal/latest/src/contrib/pbkrtest_0.5.5.tar.gz'
trying URL 'http://rspm/default/__linux__/focal/latest/src/contrib/quantreg_6.1.tar.gz'
trying URL 'http://rspm/default/__linux__/focal/latest/src/contrib/lme4_1.1-38.tar.gz'
trying URL 'http://rspm/default/__linux__/focal/latest/src/contrib/ggplot2_4.0.1.tar.gz'
trying URL 'http://rspm/default/__linux__/focal/latest/src/contrib/scales_1.4.0.tar.gz'
trying URL 'http://rspm/default/__linux__/focal/latest/src/contrib/sp_2.2-0.tar.gz'
trying URL 'http://rspm/default/__linux__/focal/latest/src/contrib/maps_3.4.3.tar.gz'
trying URL 'http://rspm/default/__linux__/focal/latest/src/contrib/car_3.1-3.tar.gz'
Warning in install.packages :
  installation of package ‘RcppEigen’ had non-zero exit status
Warning in install.packages :
  installation of package ‘lme4’ had non-zero exit status

The downloaded source packages are in
    ‘/tmp/RtmpEKPcRW/downloaded_packages’
regression1$population <- as.numeric(gsub(",","",regression1$population))
regression1$population
 [1]  107353  326534  444752  750000   64403 2744878 1600000 2333000 1572816  712091 6900000 2700000 4900000 4200000 5200000 7100000
regression2<-regression1[,-1]
head(regression2)
reg.fit1<-lm(regression2$incidents ~ regression2$population)
summary(reg.fit1)

Call:
lm(formula = regression2$incidents ~ regression2$population)

Residuals:
   Min     1Q Median     3Q    Max 
-684.5 -363.5 -156.2  133.9 1164.7 

Coefficients:
                        Estimate Std. Error t value Pr(>|t|)  
(Intercept)            4.749e+02  2.018e+02   2.353   0.0337 *
regression2$population 8.462e-05  5.804e-05   1.458   0.1669  
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 534.9 on 14 degrees of freedom
Multiple R-squared:  0.1318,    Adjusted R-squared:  0.0698 
F-statistic: 2.126 on 1 and 14 DF,  p-value: 0.1669

Based on the output obtained above, please answer the following question: Is Population significant at a 5% significance level? What is the adjusted-R squared of the model?

Ans: Population is not a statistically significant predictor of incidents at the 5% significance level (p = 0.1669). The model has a low adjusted R-squared of 0.0698, indicating limited explanatory power.

reg.fit2<-lm(incidents ~ zone+population, data = regression2)
summary(reg.fit2)

Call:
lm(formula = incidents ~ zone + population, data = regression2)

Residuals:
    Min      1Q  Median      3Q     Max 
-537.21 -273.14  -57.89  188.17  766.03 

Coefficients:
             Estimate Std. Error t value Pr(>|t|)   
(Intercept) 1.612e+02  1.675e+02   0.962  0.35363   
zone        7.266e+02  1.938e+02   3.749  0.00243 **
population  6.557e-05  4.206e-05   1.559  0.14300   
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 384.8 on 13 degrees of freedom
Multiple R-squared:  0.5828,    Adjusted R-squared:  0.5186 
F-statistic: 9.081 on 2 and 13 DF,  p-value: 0.003404

Based on the output obtained above, please answer the following question: Are Population and/or Zone significant at a 5% significance level? What is the adjusted-R squared of the model?

At the 5% significance level, Zone is a statistically significant predictor of incidents, while Population is not. The adjusted R-squared of the model is 0.5186, indicating that approximately 52% of the variation in incidents is explained by the model.

regression2$zone <- ifelse(regression2$zone == "west", 1, 0)
interaction<-regression2$zone*regression2$population
reg.fit3<-lm(regression2$incidents~interaction+regression2$population+regression2$zone)
summary(reg.fit3)

Call:
lm(formula = regression2$incidents ~ interaction + regression2$population + 
    regression2$zone)

Residuals:
    Min      1Q  Median      3Q     Max 
-540.91 -270.93  -59.56  187.99  767.99 

Coefficients:
                        Estimate Std. Error t value Pr(>|t|)  
(Intercept)            1.659e+02  2.313e+02   0.717   0.4869  
interaction            2.974e-06  9.469e-05   0.031   0.9755  
regression2$population 6.352e-05  7.868e-05   0.807   0.4352  
regression2$zone       7.192e+02  3.108e+02   2.314   0.0392 *
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 400.5 on 12 degrees of freedom
Multiple R-squared:  0.5829,    Adjusted R-squared:  0.4786 
F-statistic: 5.589 on 3 and 12 DF,  p-value: 0.01237

Based on the output obtained above, please answer the following question:

Is Population significant at a 5% significance level? Ans: It not Not significant cause (0.4352 > 0.05)

Is Zone significant at a 5% significance level? Ans: Zone is a statistically significant predictor of incidents

Is the interaction term significant at a 5% significance level? What is the adjusted-R squared of the model? Ans: the interaction term are not statistically significant cause (0.9755<0.05). The adjusted R-squared of the model is 0.4786, indicating that approximately 48% of the variation in incidents is explained by the model.

reg.fit4<-lm(regression2$incidents~interaction)
summary(reg.fit4)

Call:
lm(formula = regression2$incidents ~ interaction)

Residuals:
    Min      1Q  Median      3Q     Max 
-650.28 -301.09  -83.71  123.23 1103.76 

Coefficients:
             Estimate Std. Error t value Pr(>|t|)   
(Intercept) 4.951e+02  1.320e+02   3.751  0.00215 **
interaction 1.389e-04  4.737e-05   2.932  0.01093 * 
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 451.9 on 14 degrees of freedom
Multiple R-squared:  0.3804,    Adjusted R-squared:  0.3361 
F-statistic: 8.595 on 1 and 14 DF,  p-value: 0.01093

Is the interaction term significant at a 5% significance level? What is the adjusted-R squared of the model? Ans: Yes, it is significant at the 5% level (p = 0.01093). The adjusted R-squared of the model is 0.3361, indicating that approximately 34% of the variation in incidents is explained by the model.

Which of the models run above would you choose to make predictions? Why?? Ans: The model reg.fit2 is preferred for prediction because it has the highest adjusted R-squared (0.5186), indicating the strongest explanatory power. It provides the best trade-off between explanatory power and simplicity, maximizing adjusted R-squared while avoiding unnecessary interaction terms.

LS0tCnRpdGxlOiAiSGFuZG91dF9SZWdfMTEiCm91dHB1dDogaHRtbF9ub3RlYm9vawotLS0KCmBgYHtyfQpnZXR3ZCgpCmBgYAoKYGBge3J9CnJlZ3Jlc3Npb24xIDwtIHJlYWQuY3N2KCJpbmNpZGVudHMuY3N2IixoZWFkZXI9VCwgc2VwPSIsIikKc3RyKHJlZ3Jlc3Npb24xKQpzdW1tYXJ5KHJlZ3Jlc3Npb24xKQpgYGAKCmBgYHtyfQojIG1ha2Ugc3VyZSB0aGUgcGFja2FnZXMgZm9yIHRoaXMgY2hhcHRlcgojIGFyZSBpbnN0YWxsZWQsIGluc3RhbGwgaWYgbmVjZXNzYXJ5CnBrZyA8LSBjKCJnZ3Bsb3QyIiwgInNjYWxlcyIsICJtYXB0b29scyIsCiAgICAgICAgICAgICAgInNwIiwgIm1hcHMiLCAiZ3JpZCIsICJjYXIiICkKbmV3LnBrZyA8LSBwa2dbIShwa2cgJWluJSBpbnN0YWxsZWQucGFja2FnZXMoKSldCmlmIChsZW5ndGgobmV3LnBrZykpIHsKICBpbnN0YWxsLnBhY2thZ2VzKG5ldy5wa2cpICB9CmBgYAoKYGBge3J9CnJlZ3Jlc3Npb24xJHBvcHVsYXRpb24gPC0gYXMubnVtZXJpYyhnc3ViKCIsIiwiIixyZWdyZXNzaW9uMSRwb3B1bGF0aW9uKSkKcmVncmVzc2lvbjEkcG9wdWxhdGlvbgpgYGAKCmBgYHtyfQpyZWdyZXNzaW9uMjwtcmVncmVzc2lvbjFbLC0xXQpgYGAKCgpgYGB7cn0KaGVhZChyZWdyZXNzaW9uMikKYGBgCgpgYGB7cn0KcmVnLmZpdDE8LWxtKHJlZ3Jlc3Npb24yJGluY2lkZW50cyB+IHJlZ3Jlc3Npb24yJHBvcHVsYXRpb24pCnN1bW1hcnkocmVnLmZpdDEpCmBgYApCYXNlZCBvbiB0aGUgb3V0cHV0IG9idGFpbmVkIGFib3ZlLCBwbGVhc2UgYW5zd2VyIHRoZSBmb2xsb3dpbmcgcXVlc3Rpb246CklzIFBvcHVsYXRpb24gc2lnbmlmaWNhbnQgYXQgYSA1JSBzaWduaWZpY2FuY2UgbGV2ZWw/IFdoYXQgaXMgdGhlIGFkanVzdGVkLVIgc3F1YXJlZCBvZiB0aGUgbW9kZWw/CgpBbnM6IFBvcHVsYXRpb24gaXMgbm90IGEgc3RhdGlzdGljYWxseSBzaWduaWZpY2FudCBwcmVkaWN0b3Igb2YgaW5jaWRlbnRzIGF0IHRoZSA1JSBzaWduaWZpY2FuY2UgbGV2ZWwgKHAgPSAwLjE2NjkpLiAKVGhlIG1vZGVsIGhhcyBhIGxvdyBhZGp1c3RlZCBSLXNxdWFyZWQgb2YgMC4wNjk4LCBpbmRpY2F0aW5nIGxpbWl0ZWQgZXhwbGFuYXRvcnkgcG93ZXIuCgoKYGBge3J9CnJlZy5maXQyPC1sbShpbmNpZGVudHMgfiB6b25lK3BvcHVsYXRpb24sIGRhdGEgPSByZWdyZXNzaW9uMikKc3VtbWFyeShyZWcuZml0MikKYGBgCkJhc2VkIG9uIHRoZSBvdXRwdXQgb2J0YWluZWQgYWJvdmUsIHBsZWFzZSBhbnN3ZXIgdGhlIGZvbGxvd2luZyBxdWVzdGlvbjoKQXJlIFBvcHVsYXRpb24gYW5kL29yIFpvbmUgIHNpZ25pZmljYW50IGF0IGEgNSUgc2lnbmlmaWNhbmNlIGxldmVsPyBXaGF0IGlzIHRoZSBhZGp1c3RlZC1SIHNxdWFyZWQgb2YgdGhlIG1vZGVsPwoKQXQgdGhlIDUlIHNpZ25pZmljYW5jZSBsZXZlbCwgWm9uZSBpcyBhIHN0YXRpc3RpY2FsbHkgc2lnbmlmaWNhbnQgcHJlZGljdG9yIG9mIGluY2lkZW50cywgd2hpbGUgUG9wdWxhdGlvbiBpcyBub3QuIFRoZSBhZGp1c3RlZCBSLXNxdWFyZWQgb2YgdGhlIG1vZGVsIGlzIDAuNTE4NiwgaW5kaWNhdGluZyB0aGF0IGFwcHJveGltYXRlbHkgNTIlIG9mIHRoZSB2YXJpYXRpb24gaW4gaW5jaWRlbnRzIGlzIGV4cGxhaW5lZCBieSB0aGUgbW9kZWwuCgoKYGBge3J9CnJlZ3Jlc3Npb24yJHpvbmUgPC0gaWZlbHNlKHJlZ3Jlc3Npb24yJHpvbmUgPT0gIndlc3QiLCAxLCAwKQpgYGAKCmBgYHtyfQppbnRlcmFjdGlvbjwtcmVncmVzc2lvbjIkem9uZSpyZWdyZXNzaW9uMiRwb3B1bGF0aW9uCmBgYAoKYGBge3J9CnJlZy5maXQzPC1sbShyZWdyZXNzaW9uMiRpbmNpZGVudHN+aW50ZXJhY3Rpb24rcmVncmVzc2lvbjIkcG9wdWxhdGlvbityZWdyZXNzaW9uMiR6b25lKQpzdW1tYXJ5KHJlZy5maXQzKQpgYGAKCkJhc2VkIG9uIHRoZSBvdXRwdXQgb2J0YWluZWQgYWJvdmUsIHBsZWFzZSBhbnN3ZXIgdGhlIGZvbGxvd2luZyBxdWVzdGlvbjoKCklzIFBvcHVsYXRpb24gc2lnbmlmaWNhbnQgYXQgYSA1JSBzaWduaWZpY2FuY2UgbGV2ZWw/IApBbnM6IEl0IG5vdCBOb3Qgc2lnbmlmaWNhbnQgY2F1c2UgKDAuNDM1MiA+IDAuMDUpCgpJcyBab25lIHNpZ25pZmljYW50IGF0IGEgNSUgc2lnbmlmaWNhbmNlIGxldmVsPwpBbnM6IFpvbmUgaXMgYSBzdGF0aXN0aWNhbGx5IHNpZ25pZmljYW50IHByZWRpY3RvciBvZiBpbmNpZGVudHMKCklzIHRoZSBpbnRlcmFjdGlvbiB0ZXJtIHNpZ25pZmljYW50IGF0IGEgNSUgc2lnbmlmaWNhbmNlIGxldmVsPyBXaGF0IGlzIHRoZSBhZGp1c3RlZC1SIHNxdWFyZWQgb2YgdGhlIG1vZGVsPwpBbnM6IHRoZSBpbnRlcmFjdGlvbiB0ZXJtIGFyZSBub3Qgc3RhdGlzdGljYWxseSBzaWduaWZpY2FudCBjYXVzZSAoMC45NzU1PDAuMDUpLiBUaGUgYWRqdXN0ZWQgUi1zcXVhcmVkIG9mIHRoZSBtb2RlbCBpcyAwLjQ3ODYsIGluZGljYXRpbmcgdGhhdCBhcHByb3hpbWF0ZWx5IDQ4JSBvZiB0aGUgdmFyaWF0aW9uIGluIGluY2lkZW50cyBpcyBleHBsYWluZWQgYnkgdGhlIG1vZGVsLgoKCmBgYHtyfQpyZWcuZml0NDwtbG0ocmVncmVzc2lvbjIkaW5jaWRlbnRzfmludGVyYWN0aW9uKQpzdW1tYXJ5KHJlZy5maXQ0KQpgYGAKSXMgdGhlIGludGVyYWN0aW9uIHRlcm0gc2lnbmlmaWNhbnQgYXQgYSA1JSBzaWduaWZpY2FuY2UgbGV2ZWw/IFdoYXQgaXMgdGhlIGFkanVzdGVkLVIgc3F1YXJlZCBvZiB0aGUgbW9kZWw/CkFuczogWWVzLCBpdCBpcyBzaWduaWZpY2FudCBhdCB0aGUgNSUgbGV2ZWwgIChwID0gMC4wMTA5MykuIFRoZSBhZGp1c3RlZCBSLXNxdWFyZWQgb2YgdGhlIG1vZGVsIGlzIDAuMzM2MSwgaW5kaWNhdGluZyB0aGF0IGFwcHJveGltYXRlbHkgMzQlIG9mIHRoZSB2YXJpYXRpb24gaW4gaW5jaWRlbnRzIGlzIGV4cGxhaW5lZCBieSB0aGUgbW9kZWwuCgpXaGljaCBvZiB0aGUgbW9kZWxzIHJ1biBhYm92ZSB3b3VsZCB5b3UgY2hvb3NlIHRvIG1ha2UgcHJlZGljdGlvbnM/IFdoeT8/CkFuczogVGhlIG1vZGVsIHJlZy5maXQyIGlzIHByZWZlcnJlZCBmb3IgcHJlZGljdGlvbiBiZWNhdXNlIGl0IGhhcyB0aGUgaGlnaGVzdCBhZGp1c3RlZCBSLXNxdWFyZWQgKDAuNTE4NiksIGluZGljYXRpbmcgdGhlIHN0cm9uZ2VzdCBleHBsYW5hdG9yeSBwb3dlci4gSXQgcHJvdmlkZXMgdGhlIGJlc3QgdHJhZGUtb2ZmIGJldHdlZW4gZXhwbGFuYXRvcnkgcG93ZXIgYW5kIHNpbXBsaWNpdHksIG1heGltaXppbmcgYWRqdXN0ZWQgUi1zcXVhcmVkIHdoaWxlIGF2b2lkaW5nIHVubmVjZXNzYXJ5IGludGVyYWN0aW9uIHRlcm1zLgoKCgoK