getwd()
[1] "/Users/gretacapelletti/Downloads"
# make sure the packages for this chapter
# are installed, install if necessary
pkg <- c("ggplot2", "scales", "maptools",
              "sp", "maps", "grid", "car" )
new.pkg <- pkg[!(pkg %in% installed.packages())]
if (length(new.pkg)) {
  install.packages(new.pkg)  
}
Warning in install.packages :
  package ‘maptools’ is not available for this version of R

A version of this package for your version of R might be available elsewhere,
see the ideas at
https://cran.r-project.org/doc/manuals/r-patched/R-admin.html#Installing-packages
# read the CSV with headers
regression1<-read.csv("incidents.csv", header=T,sep =",")
#View(regression1)
summary(regression1)
     area               zone            population          incidents     
 Length:16          Length:16          Length:16          Min.   : 103.0  
 Class :character   Class :character   Class :character   1st Qu.: 277.8  
 Mode  :character   Mode  :character   Mode  :character   Median : 654.0  
                                                          Mean   : 695.2  
                                                          3rd Qu.: 853.0  
                                                          Max.   :2072.0  
str(regression1)
'data.frame':   16 obs. of  4 variables:
 $ area      : chr  "Boulder" "California-lexington" "Huntsville" "Seattle" ...
 $ zone      : chr  "west" "east" "east" "west" ...
 $ population: chr  "107,353" "326,534" "444,752" "750,000" ...
 $ incidents : int  605 103 161 1703 1003 527 721 704 105 403 ...
regression1$population <- as.numeric(gsub(",","",regression1$population))
regression1$population
 [1]  107353  326534  444752  750000   64403 2744878 1600000 2333000 1572816  712091 6900000 2700000 4900000
[14] 4200000 5200000 7100000
str(regression1$population)
 num [1:16] 107353 326534 444752 750000 64403 ...
regression2<-regression1[,-1]#new data frame with the deletion of column 1 
head(regression2)
reg.fit1<-lm(regression1$incidents ~ regression1$population)
summary(reg.fit1)

Call:
lm(formula = regression1$incidents ~ regression1$population)

Residuals:
   Min     1Q Median     3Q    Max 
-684.5 -363.5 -156.2  133.9 1164.7 

Coefficients:
                        Estimate Std. Error t value Pr(>|t|)  
(Intercept)            4.749e+02  2.018e+02   2.353   0.0337 *
regression1$population 8.462e-05  5.804e-05   1.458   0.1669  
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 534.9 on 14 degrees of freedom
Multiple R-squared:  0.1318,    Adjusted R-squared:  0.0698 
F-statistic: 2.126 on 1 and 14 DF,  p-value: 0.1669

Based on the output obtained above, please answer the following question:

Is Population significant at a 5% significance level? What is the adjusted-R squared of the model? To understand if population is significant, I have to look at the p-value for population, which is 0.1669. Since this value is greater than the 5% significance level (0.05), population is not significant.

Adjusted R-squared of the model is 0.0698.

reg.fit2<-lm(incidents ~ zone+population, data = regression1)
summary(reg.fit2)

Call:
lm(formula = incidents ~ zone + population, data = regression1)

Residuals:
    Min      1Q  Median      3Q     Max 
-537.21 -273.14  -57.89  188.17  766.03 

Coefficients:
             Estimate Std. Error t value Pr(>|t|)   
(Intercept) 1.612e+02  1.675e+02   0.962  0.35363   
zonewest    7.266e+02  1.938e+02   3.749  0.00243 **
population  6.557e-05  4.206e-05   1.559  0.14300   
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 384.8 on 13 degrees of freedom
Multiple R-squared:  0.5828,    Adjusted R-squared:  0.5186 
F-statistic: 9.081 on 2 and 13 DF,  p-value: 0.003404

Based on the output obtained above, please answer the following question:

Are Population and/or Zone significant at a 5% significance level? What is the adjusted-R squared of the model?

At the 5% significance level, Zone is significant, as its p-value is 0.00243, which is less than 0.05. Population tho, is not significant, as its p-value is 0.14300, which is greater than 0.05. The adjusted R-squared value of the model is 0.5186.

regression1$zone <- ifelse(regression1$zone == "west", 1, 0)#Please explain the syntax and the output
#View(regression1)
str(regression1)
'data.frame':   16 obs. of  4 variables:
 $ area      : chr  "Boulder" "California-lexington" "Huntsville" "Seattle" ...
 $ zone      : num  1 0 0 1 1 0 1 1 0 0 ...
 $ population: num  107353 326534 444752 750000 64403 ...
 $ incidents : int  605 103 161 1703 1003 527 721 704 105 403 ...
#regression1$zone<-as.integer((regression1$zone),replace=TRUE) was not necessary
interaction<-regression1$zone*regression1$population#Explain the syntax
reg.fit3<-lm(regression1$incidents~interaction+regression1$population+regression1$zone)
summary(reg.fit3)

Call:
lm(formula = regression1$incidents ~ interaction + regression1$population + 
    regression1$zone)

Residuals:
    Min      1Q  Median      3Q     Max 
-540.91 -270.93  -59.56  187.99  767.99 

Coefficients:
                        Estimate Std. Error t value Pr(>|t|)  
(Intercept)            1.659e+02  2.313e+02   0.717   0.4869  
interaction            2.974e-06  9.469e-05   0.031   0.9755  
regression1$population 6.352e-05  7.868e-05   0.807   0.4352  
regression1$zone       7.192e+02  3.108e+02   2.314   0.0392 *
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 400.5 on 12 degrees of freedom
Multiple R-squared:  0.5829,    Adjusted R-squared:  0.4786 
F-statistic: 5.589 on 3 and 12 DF,  p-value: 0.01237

Based on the output obtained above, please answer the following question:

Is Population significant at a 5% significance level? Is Zone significant at a 5% significance level? Is the interaction term significant at a 5% significance level? What is the adjusted-R squared of the model?

At the 5% significance level, Population is not significant, as its p-value is 0.4352, which is greater than 0.05. Zone is significant, as its p-value is 0.0392, which is less than 0.05. The interaction term is not significant, as its p-value is 0.9755, which is greater than 0.05. The adjusted R-squared value of the model is 0.4786.

Let us now run a model where the only feature is the interaction term.

reg.fit4<-lm(regression1$incidents~interaction)
summary(reg.fit4)

Call:
lm(formula = regression1$incidents ~ interaction)

Residuals:
    Min      1Q  Median      3Q     Max 
-650.28 -301.09  -83.71  123.23 1103.76 

Coefficients:
             Estimate Std. Error t value Pr(>|t|)   
(Intercept) 4.951e+02  1.320e+02   3.751  0.00215 **
interaction 1.389e-04  4.737e-05   2.932  0.01093 * 
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 451.9 on 14 degrees of freedom
Multiple R-squared:  0.3804,    Adjusted R-squared:  0.3361 
F-statistic: 8.595 on 1 and 14 DF,  p-value: 0.01093

Is the interaction term significant at a 5% significance level? What is the adjusted-R squared of the model? At the 5% significance level, the interaction term is significant, as its p-value is 0.01093, which is less than 0.05. The adjusted R-squared value of the model is 0.3361.

Which of the models run above would you choose to make predictions? Why?? Model 2 (reg.fit2) would be the best choice for making predictions. This model includes both “zone” and “population” as predictors, with “zone” being statistically significant at the 5% significance level. It also has a higher adjusted R-squared value (0.5186) compared to the other models, suggesting that it provides a better fit to the data and will likely yield more reliable predictions.

LS0tCnRpdGxlOiAiQWN0aXZpdHkgMTEiCm91dHB1dDogaHRtbF9ub3RlYm9vawotLS0KCgpgYGB7cn0KZ2V0d2QoKQpgYGAKYGBge3J9CiMgbWFrZSBzdXJlIHRoZSBwYWNrYWdlcyBmb3IgdGhpcyBjaGFwdGVyCiMgYXJlIGluc3RhbGxlZCwgaW5zdGFsbCBpZiBuZWNlc3NhcnkKcGtnIDwtIGMoImdncGxvdDIiLCAic2NhbGVzIiwgIm1hcHRvb2xzIiwKICAgICAgICAgICAgICAic3AiLCAibWFwcyIsICJncmlkIiwgImNhciIgKQpuZXcucGtnIDwtIHBrZ1shKHBrZyAlaW4lIGluc3RhbGxlZC5wYWNrYWdlcygpKV0KaWYgKGxlbmd0aChuZXcucGtnKSkgewogIGluc3RhbGwucGFja2FnZXMobmV3LnBrZykgIAp9CmBgYApgYGB7cn0KIyByZWFkIHRoZSBDU1Ygd2l0aCBoZWFkZXJzCnJlZ3Jlc3Npb24xPC1yZWFkLmNzdigiaW5jaWRlbnRzLmNzdiIsIGhlYWRlcj1ULHNlcCA9IiwiKQpgYGAKYGBge3J9CiNWaWV3KHJlZ3Jlc3Npb24xKQpgYGAKCmBgYHtyfQpzdW1tYXJ5KHJlZ3Jlc3Npb24xKQpgYGAKYGBge3J9CnN0cihyZWdyZXNzaW9uMSkKYGBgCmBgYHtyfQpyZWdyZXNzaW9uMSRwb3B1bGF0aW9uIDwtIGFzLm51bWVyaWMoZ3N1YigiLCIsIiIscmVncmVzc2lvbjEkcG9wdWxhdGlvbikpCnJlZ3Jlc3Npb24xJHBvcHVsYXRpb24KYGBgCmBgYHtyfQpzdHIocmVncmVzc2lvbjEkcG9wdWxhdGlvbikKYGBgCmBgYHtyfQpyZWdyZXNzaW9uMjwtcmVncmVzc2lvbjFbLC0xXSNuZXcgZGF0YSBmcmFtZSB3aXRoIHRoZSBkZWxldGlvbiBvZiBjb2x1bW4gMSAKYGBgCmBgYHtyfQpoZWFkKHJlZ3Jlc3Npb24yKQpgYGAKYGBge3J9CnJlZy5maXQxPC1sbShyZWdyZXNzaW9uMSRpbmNpZGVudHMgfiByZWdyZXNzaW9uMSRwb3B1bGF0aW9uKQpgYGAKYGBge3J9CnN1bW1hcnkocmVnLmZpdDEpCmBgYAoKQmFzZWQgb24gdGhlIG91dHB1dCBvYnRhaW5lZCBhYm92ZSwgcGxlYXNlIGFuc3dlciB0aGUgZm9sbG93aW5nIHF1ZXN0aW9uOgoKSXMgUG9wdWxhdGlvbiBzaWduaWZpY2FudCBhdCBhIDUlIHNpZ25pZmljYW5jZSBsZXZlbD8gV2hhdCBpcyB0aGUgYWRqdXN0ZWQtUiBzcXVhcmVkIG9mIHRoZSBtb2RlbD8KVG8gdW5kZXJzdGFuZCBpZiBwb3B1bGF0aW9uIGlzIHNpZ25pZmljYW50LCBJIGhhdmUgdG8gbG9vayBhdCB0aGUgcC12YWx1ZSBmb3IgcG9wdWxhdGlvbiwgd2hpY2ggaXMgMC4xNjY5LiBTaW5jZSB0aGlzIHZhbHVlIGlzIGdyZWF0ZXIgdGhhbiB0aGUgNSUgc2lnbmlmaWNhbmNlIGxldmVsICgwLjA1KSwgcG9wdWxhdGlvbiBpcyBub3Qgc2lnbmlmaWNhbnQuCgpBZGp1c3RlZCBSLXNxdWFyZWQgb2YgdGhlIG1vZGVsIGlzIDAuMDY5OC4KCmBgYHtyfQpyZWcuZml0MjwtbG0oaW5jaWRlbnRzIH4gem9uZStwb3B1bGF0aW9uLCBkYXRhID0gcmVncmVzc2lvbjEpCmBgYApgYGB7cn0Kc3VtbWFyeShyZWcuZml0MikKYGBgCkJhc2VkIG9uIHRoZSBvdXRwdXQgb2J0YWluZWQgYWJvdmUsIHBsZWFzZSBhbnN3ZXIgdGhlIGZvbGxvd2luZyBxdWVzdGlvbjoKCkFyZSBQb3B1bGF0aW9uIGFuZC9vciBab25lICBzaWduaWZpY2FudCBhdCBhIDUlIHNpZ25pZmljYW5jZSBsZXZlbD8gV2hhdCBpcyB0aGUgYWRqdXN0ZWQtUiBzcXVhcmVkIG9mIHRoZSBtb2RlbD8KCkF0IHRoZSA1JSBzaWduaWZpY2FuY2UgbGV2ZWwsIFpvbmUgaXMgc2lnbmlmaWNhbnQsIGFzIGl0cyBwLXZhbHVlIGlzIDAuMDAyNDMsIHdoaWNoIGlzIGxlc3MgdGhhbiAwLjA1LgpQb3B1bGF0aW9uIHRobywgaXMgbm90IHNpZ25pZmljYW50LCBhcyBpdHMgcC12YWx1ZSBpcyAwLjE0MzAwLCB3aGljaCBpcyBncmVhdGVyIHRoYW4gMC4wNS4KVGhlIGFkanVzdGVkIFItc3F1YXJlZCB2YWx1ZSBvZiB0aGUgbW9kZWwgaXMgMC41MTg2LgpgYGB7cn0KcmVncmVzc2lvbjEkem9uZSA8LSBpZmVsc2UocmVncmVzc2lvbjEkem9uZSA9PSAid2VzdCIsIDEsIDApI1BsZWFzZSBleHBsYWluIHRoZSBzeW50YXggYW5kIHRoZSBvdXRwdXQKYGBgCmBgYHtyfQojVmlldyhyZWdyZXNzaW9uMSkKYGBgCmBgYHtyfQpzdHIocmVncmVzc2lvbjEpCmBgYApgYGB7cn0KI3JlZ3Jlc3Npb24xJHpvbmU8LWFzLmludGVnZXIoKHJlZ3Jlc3Npb24xJHpvbmUpLHJlcGxhY2U9VFJVRSkgd2FzIG5vdCBuZWNlc3NhcnkKYGBgCmBgYHtyfQppbnRlcmFjdGlvbjwtcmVncmVzc2lvbjEkem9uZSpyZWdyZXNzaW9uMSRwb3B1bGF0aW9uI0V4cGxhaW4gdGhlIHN5bnRheApgYGAKYGBge3J9CnJlZy5maXQzPC1sbShyZWdyZXNzaW9uMSRpbmNpZGVudHN+aW50ZXJhY3Rpb24rcmVncmVzc2lvbjEkcG9wdWxhdGlvbityZWdyZXNzaW9uMSR6b25lKQpgYGAKYGBge3J9CnN1bW1hcnkocmVnLmZpdDMpCmBgYAoKQmFzZWQgb24gdGhlIG91dHB1dCBvYnRhaW5lZCBhYm92ZSwgcGxlYXNlIGFuc3dlciB0aGUgZm9sbG93aW5nIHF1ZXN0aW9uOgoKSXMgUG9wdWxhdGlvbiBzaWduaWZpY2FudCBhdCBhIDUlIHNpZ25pZmljYW5jZSBsZXZlbD8gCklzIFpvbmUgc2lnbmlmaWNhbnQgYXQgYSA1JSBzaWduaWZpY2FuY2UgbGV2ZWw/CklzIHRoZSBpbnRlcmFjdGlvbiB0ZXJtIHNpZ25pZmljYW50IGF0IGEgNSUgc2lnbmlmaWNhbmNlIGxldmVsPyBXaGF0IGlzIHRoZSBhZGp1c3RlZC1SIHNxdWFyZWQgb2YgdGhlIG1vZGVsPwoKQXQgdGhlIDUlIHNpZ25pZmljYW5jZSBsZXZlbCwgUG9wdWxhdGlvbiBpcyBub3Qgc2lnbmlmaWNhbnQsIGFzIGl0cyBwLXZhbHVlIGlzIDAuNDM1Miwgd2hpY2ggaXMgZ3JlYXRlciB0aGFuIDAuMDUuIFpvbmUgaXMgc2lnbmlmaWNhbnQsIGFzIGl0cyBwLXZhbHVlIGlzIDAuMDM5Miwgd2hpY2ggaXMgbGVzcyB0aGFuIDAuMDUuClRoZSBpbnRlcmFjdGlvbiB0ZXJtIGlzIG5vdCBzaWduaWZpY2FudCwgYXMgaXRzIHAtdmFsdWUgaXMgMC45NzU1LCB3aGljaCBpcyBncmVhdGVyIHRoYW4gMC4wNS4KVGhlIGFkanVzdGVkIFItc3F1YXJlZCB2YWx1ZSBvZiB0aGUgbW9kZWwgaXMgMC40Nzg2LgoKCgoKTGV0IHVzIG5vdyBydW4gYSBtb2RlbCB3aGVyZSB0aGUgb25seSBmZWF0dXJlIGlzIHRoZSBpbnRlcmFjdGlvbiB0ZXJtLiAKCgpgYGB7cn0KcmVnLmZpdDQ8LWxtKHJlZ3Jlc3Npb24xJGluY2lkZW50c35pbnRlcmFjdGlvbikKYGBgCmBgYHtyfQpzdW1tYXJ5KHJlZy5maXQ0KQpgYGAKSXMgdGhlIGludGVyYWN0aW9uIHRlcm0gc2lnbmlmaWNhbnQgYXQgYSA1JSBzaWduaWZpY2FuY2UgbGV2ZWw/IFdoYXQgaXMgdGhlIGFkanVzdGVkLVIgc3F1YXJlZCBvZiB0aGUgbW9kZWw/CkF0IHRoZSA1JSBzaWduaWZpY2FuY2UgbGV2ZWwsIHRoZSBpbnRlcmFjdGlvbiB0ZXJtIGlzIHNpZ25pZmljYW50LCBhcyBpdHMgcC12YWx1ZSBpcyAwLjAxMDkzLCB3aGljaCBpcyBsZXNzIHRoYW4gMC4wNS4KVGhlIGFkanVzdGVkIFItc3F1YXJlZCB2YWx1ZSBvZiB0aGUgbW9kZWwgaXMgMC4zMzYxLiAKCgpXaGljaCBvZiB0aGUgbW9kZWxzIHJ1biBhYm92ZSB3b3VsZCB5b3UgY2hvb3NlIHRvIG1ha2UgcHJlZGljdGlvbnM/IFdoeT8/Ck1vZGVsIDIgKHJlZy5maXQyKSB3b3VsZCBiZSB0aGUgYmVzdCBjaG9pY2UgZm9yIG1ha2luZyBwcmVkaWN0aW9ucy4gVGhpcyBtb2RlbCBpbmNsdWRlcyBib3RoICJ6b25lIiBhbmQgInBvcHVsYXRpb24iIGFzIHByZWRpY3RvcnMsIHdpdGggInpvbmUiIGJlaW5nIHN0YXRpc3RpY2FsbHkgc2lnbmlmaWNhbnQgYXQgdGhlIDUlIHNpZ25pZmljYW5jZSBsZXZlbC4gSXQgYWxzbyBoYXMgYSBoaWdoZXIgYWRqdXN0ZWQgUi1zcXVhcmVkIHZhbHVlICgwLjUxODYpIGNvbXBhcmVkIHRvIHRoZSBvdGhlciBtb2RlbHMsIHN1Z2dlc3RpbmcgdGhhdCBpdCBwcm92aWRlcyBhIGJldHRlciBmaXQgdG8gdGhlIGRhdGEgYW5kIHdpbGwgbGlrZWx5IHlpZWxkIG1vcmUgcmVsaWFibGUgcHJlZGljdGlvbnMuCgoKCg==