getwd()
[1] "C:/Users/n0898873/Downloads"
# make sure the packages for this chapter
# are installed, install if necessary
pkg <- c("ggplot2", "scales", "maptools",
              "sp", "maps", "grid", "car" )
new.pkg <- pkg[!(pkg %in% installed.packages())]
if (length(new.pkg)) {
  install.packages(new.pkg)  
}
WARNING: Rtools is required to build R packages but is not currently installed. Please download and install the appropriate version of Rtools before proceeding:

https://cran.rstudio.com/bin/windows/Rtools/
Installing package into ‘C:/Users/n0898873/AppData/Local/R/win-library/4.2’
(as ‘lib’ is unspecified)
Warning in install.packages :
  package ‘maptools’ is not available for this version of R

A version of this package for your version of R might be available elsewhere,
see the ideas at
https://cran.r-project.org/doc/manuals/r-patched/R-admin.html#Installing-packages
# read the CSV with headers
regression1<-read.csv("incidents.csv", header=T,sep =",")

IMporting the excel

#View(regression1)

So you can publish it

summary(regression1)
     area               zone            population          incidents     
 Length:16          Length:16          Length:16          Min.   : 103.0  
 Class :character   Class :character   Class :character   1st Qu.: 277.8  
 Mode  :character   Mode  :character   Mode  :character   Median : 654.0  
                                                          Mean   : 695.2  
                                                          3rd Qu.: 853.0  
                                                          Max.   :2072.0  

16 points

str(regression1)
'data.frame':   16 obs. of  4 variables:
 $ area      : chr  "Boulder" "California-lexington" "Huntsville" "Seattle" ...
 $ zone      : chr  "west" "east" "east" "west" ...
 $ population: chr  "107,353" "326,534" "444,752" "750,000" ...
 $ incidents : int  605 103 161 1703 1003 527 721 704 105 403 ...
regression1$population <- as.numeric(gsub(",","",regression1$population))
regression1$population
 [1]  107353  326534  444752  750000   64403 2744878 1600000 2333000 1572816  712091 6900000 2700000 4900000 4200000 5200000 7100000
str(regression1$population)
 num [1:16] 107353 326534 444752 750000 64403 ...

The data in numerical now

regression2<-regression1[,-1]#new data frame with the deletion of column 1 

We are droping column number 1. We are not using area

head(regression2)
reg.fit1<-lm(regression1$incidents ~ regression1$population)

Linear regression between incidents and population

summary(reg.fit1)

Call:
lm(formula = regression1$incidents ~ regression1$population)

Residuals:
   Min     1Q Median     3Q    Max 
-684.5 -363.5 -156.2  133.9 1164.7 

Coefficients:
                        Estimate Std. Error t value Pr(>|t|)  
(Intercept)            4.749e+02  2.018e+02   2.353   0.0337 *
regression1$population 8.462e-05  5.804e-05   1.458   0.1669  
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 534.9 on 14 degrees of freedom
Multiple R-squared:  0.1318,    Adjusted R-squared:  0.0698 
F-statistic: 2.126 on 1 and 14 DF,  p-value: 0.1669

What is the hypotesis?

Population is not afects incidents. We fail to reject the null. because is not less than 0.05

Is population significant at a 5% significance level? What is it the adjusted R squared of the module?

“Population” is not considered statistically significant at the 5% significance level based on the p-value (p = 0.1669). The adjusted R-squared is 0.0698, indicating that the model has limited explanatory power in predicting the response variable.

reg.fit2<-lm(incidents ~ zone+population, data = regression1)
summary(reg.fit2)

Call:
lm(formula = incidents ~ zone + population, data = regression1)

Residuals:
    Min      1Q  Median      3Q     Max 
-537.21 -273.14  -57.89  188.17  766.03 

Coefficients:
             Estimate Std. Error t value Pr(>|t|)   
(Intercept) 1.612e+02  1.675e+02   0.962  0.35363   
zonewest    7.266e+02  1.938e+02   3.749  0.00243 **
population  6.557e-05  4.206e-05   1.559  0.14300   
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 384.8 on 13 degrees of freedom
Multiple R-squared:  0.5828,    Adjusted R-squared:  0.5186 
F-statistic: 9.081 on 2 and 13 DF,  p-value: 0.003404

ZONE IS CRITICAL AN A 5% SIGNIFICANT LEVEL. THE MODULE IS SIGNIFICANT AS A 5% SIGNIFICANT LEVEL

Are pop and zone siginicant at a 5%level?

n summary, “zone” is statistically significant at the 5% level, while “population” is not. The model, as a whole, is statistically significant based on the F-statistic. Adjusted R-squared indicates moderate explanatory power.

regression1$zone <- ifelse(regression1$zone == "west", 1, 0)

Define zone as 1 and 0. Creating a new column. If it is west is 1 otherwise is 0

#View(regression1)

If u take the # it will have zone as 1 and 0

str(regression1)
'data.frame':   16 obs. of  4 variables:
 $ area      : chr  "Boulder" "California-lexington" "Huntsville" "Seattle" ...
 $ zone      : num  1 0 0 1 1 0 1 1 0 0 ...
 $ population: num  107353 326534 444752 750000 64403 ...
 $ incidents : int  605 103 161 1703 1003 527 721 704 105 403 ...
#regression1$zone<-as.integer((regression1$zone),replace=TRUE) was not necessary
interaction<-regression1$zone*regression1$population

creating an interaction term between zone and population

reg.fit3<-lm(regression1$incidents~interaction+regression1$population+regression1$zone)
summary(reg.fit3)

Call:
lm(formula = regression1$incidents ~ interaction + regression1$population + 
    regression1$zone)

Residuals:
    Min      1Q  Median      3Q     Max 
-540.91 -270.93  -59.56  187.99  767.99 

Coefficients:
                        Estimate Std. Error t value Pr(>|t|)  
(Intercept)            1.659e+02  2.313e+02   0.717   0.4869  
interaction            2.974e-06  9.469e-05   0.031   0.9755  
regression1$population 6.352e-05  7.868e-05   0.807   0.4352  
regression1$zone       7.192e+02  3.108e+02   2.314   0.0392 *
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 400.5 on 12 degrees of freedom
Multiple R-squared:  0.5829,    Adjusted R-squared:  0.4786 
F-statistic: 5.589 on 3 and 12 DF,  p-value: 0.01237

Based on the output obtained above, please answer the following question:

Is Population significant at a 5% significance level? Is Zone significant at a 5% significance level? Is the interaction term significant at a 5% significance level? What is the adjusted-R squared of the model?

Population is not statistically significant at the 5% level. Zone is statistically significant at the 5% level. The interaction term is not statistically significant at the 5% level. The model, as a whole, is statistically significant based on the F-statistic. Adjusted R-squared indicates moderate explanatory power.

reg.fit4<-lm(regression1$incidents~interaction)
summary(reg.fit4)

Call:
lm(formula = regression1$incidents ~ interaction)

Residuals:
    Min      1Q  Median      3Q     Max 
-650.28 -301.09  -83.71  123.23 1103.76 

Coefficients:
             Estimate Std. Error t value Pr(>|t|)   
(Intercept) 4.951e+02  1.320e+02   3.751  0.00215 **
interaction 1.389e-04  4.737e-05   2.932  0.01093 * 
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 451.9 on 14 degrees of freedom
Multiple R-squared:  0.3804,    Adjusted R-squared:  0.3361 
F-statistic: 8.595 on 1 and 14 DF,  p-value: 0.01093

Let us now run a model where the only feature is the interaction term.

Is the interaction term significant at a 5% significance level? What is the adjusted-R squared of the model?

The interaction term is statistically significant at the 5% level. The model, interaction term as a predictor, has a moderate explanatory power

LS0tDQp0aXRsZTogIkhhbmRvdXRfUmVnIg0Kb3V0cHV0Og0KICBodG1sX25vdGVib29rOiBkZWZhdWx0DQogIGh0bWxfZG9jdW1lbnQ6DQogICAgZGZfcHJpbnQ6IHBhZ2VkDQogIHBkZl9kb2N1bWVudDogZGVmYXVsdA0KLS0tDQoNCi0tLQ0KdGl0bGU6ICJIYW5kb3V0XzExX3JlZyINCm91dHB1dDogaHRtbF9ub3RlYm9vaw0KLS0tDQoNCg0KDQoNCmBgYHtyfQ0KZ2V0d2QoKQ0KYGBgDQoNCg0KDQpgYGB7cn0NCiMgbWFrZSBzdXJlIHRoZSBwYWNrYWdlcyBmb3IgdGhpcyBjaGFwdGVyDQojIGFyZSBpbnN0YWxsZWQsIGluc3RhbGwgaWYgbmVjZXNzYXJ5DQpwa2cgPC0gYygiZ2dwbG90MiIsICJzY2FsZXMiLCAibWFwdG9vbHMiLA0KICAgICAgICAgICAgICAic3AiLCAibWFwcyIsICJncmlkIiwgImNhciIgKQ0KbmV3LnBrZyA8LSBwa2dbIShwa2cgJWluJSBpbnN0YWxsZWQucGFja2FnZXMoKSldDQppZiAobGVuZ3RoKG5ldy5wa2cpKSB7DQogIGluc3RhbGwucGFja2FnZXMobmV3LnBrZykgIA0KfQ0KYGBgDQoNCg0KDQpgYGB7cn0NCiMgcmVhZCB0aGUgQ1NWIHdpdGggaGVhZGVycw0KcmVncmVzc2lvbjE8LXJlYWQuY3N2KCJpbmNpZGVudHMuY3N2IiwgaGVhZGVyPVQsc2VwID0iLCIpDQoNCg0KYGBgDQoNCklNcG9ydGluZyB0aGUgZXhjZWwgDQoNCmBgYHtyfQ0KI1ZpZXcocmVncmVzc2lvbjEpDQpgYGANCg0KU28geW91IGNhbiBwdWJsaXNoIGl0DQoNCmBgYHtyfQ0Kc3VtbWFyeShyZWdyZXNzaW9uMSkNCmBgYA0KDQoxNiBwb2ludHMNCmBgYHtyfQ0Kc3RyKHJlZ3Jlc3Npb24xKQ0KYGBgDQoNCg0KYGBge3J9DQpyZWdyZXNzaW9uMSRwb3B1bGF0aW9uIDwtIGFzLm51bWVyaWMoZ3N1YigiLCIsIiIscmVncmVzc2lvbjEkcG9wdWxhdGlvbikpDQpyZWdyZXNzaW9uMSRwb3B1bGF0aW9uDQpgYGANCg0KYGBge3J9DQpzdHIocmVncmVzc2lvbjEkcG9wdWxhdGlvbikNCmBgYA0KDQpUaGUgZGF0YSBpbiBudW1lcmljYWwgbm93DQoNCg0KYGBge3J9DQpyZWdyZXNzaW9uMjwtcmVncmVzc2lvbjFbLC0xXSNuZXcgZGF0YSBmcmFtZSB3aXRoIHRoZSBkZWxldGlvbiBvZiBjb2x1bW4gMSANCmBgYA0KDQpXZSBhcmUgZHJvcGluZyBjb2x1bW4gbnVtYmVyIDEuIFdlIGFyZSBub3QgdXNpbmcgYXJlYQ0KDQpgYGB7cn0NCmhlYWQocmVncmVzc2lvbjIpDQpgYGANCg0KDQoNCg0KYGBge3J9DQpyZWcuZml0MTwtbG0ocmVncmVzc2lvbjEkaW5jaWRlbnRzIH4gcmVncmVzc2lvbjEkcG9wdWxhdGlvbikNCmBgYA0KDQpMaW5lYXIgcmVncmVzc2lvbiBiZXR3ZWVuIGluY2lkZW50cyBhbmQgcG9wdWxhdGlvbiANCg0KDQoNCmBgYHtyfQ0Kc3VtbWFyeShyZWcuZml0MSkNCmBgYA0KV2hhdCBpcyB0aGUgaHlwb3Rlc2lzPyANCg0KUG9wdWxhdGlvbiBpcyBub3QgYWZlY3RzIGluY2lkZW50cy4gDQpXZSBmYWlsIHRvIHJlamVjdCB0aGUgbnVsbC4gYmVjYXVzZSBpcyBub3QgbGVzcyB0aGFuIDAuMDUNCg0KSXMgcG9wdWxhdGlvbiBzaWduaWZpY2FudCBhdCBhIDUlIHNpZ25pZmljYW5jZSBsZXZlbD8gV2hhdCBpcyBpdCB0aGUgYWRqdXN0ZWQgUiBzcXVhcmVkIG9mIHRoZSBtb2R1bGU/IA0KDQoiUG9wdWxhdGlvbiIgaXMgbm90IGNvbnNpZGVyZWQgc3RhdGlzdGljYWxseSBzaWduaWZpY2FudCBhdCB0aGUgNSUgc2lnbmlmaWNhbmNlIGxldmVsIGJhc2VkIG9uIHRoZSBwLXZhbHVlIChwID0gMC4xNjY5KS4NClRoZSBhZGp1c3RlZCBSLXNxdWFyZWQgaXMgMC4wNjk4LCBpbmRpY2F0aW5nIHRoYXQgdGhlIG1vZGVsIGhhcyBsaW1pdGVkIGV4cGxhbmF0b3J5IHBvd2VyIGluIHByZWRpY3RpbmcgdGhlIHJlc3BvbnNlIHZhcmlhYmxlLg0KDQpgYGB7cn0NCnJlZy5maXQyPC1sbShpbmNpZGVudHMgfiB6b25lK3BvcHVsYXRpb24sIGRhdGEgPSByZWdyZXNzaW9uMSkNCmBgYA0KDQoNCg0KYGBge3J9DQpzdW1tYXJ5KHJlZy5maXQyKQ0KYGBgDQoNClpPTkUgSVMgQ1JJVElDQUwgQU4gQSA1JSBTSUdOSUZJQ0FOVCBMRVZFTC4gDQpUSEUgTU9EVUxFIElTIFNJR05JRklDQU5UIEFTIEEgNSUgU0lHTklGSUNBTlQgTEVWRUwNCg0KQXJlIHBvcCBhbmQgem9uZSBzaWdpbmljYW50IGF0IGEgNSVsZXZlbD8gDQoNCm4gc3VtbWFyeSwgInpvbmUiIGlzIHN0YXRpc3RpY2FsbHkgc2lnbmlmaWNhbnQgYXQgdGhlIDUlIGxldmVsLCB3aGlsZSAicG9wdWxhdGlvbiIgaXMgbm90LiBUaGUgbW9kZWwsIGFzIGEgd2hvbGUsIGlzIHN0YXRpc3RpY2FsbHkgc2lnbmlmaWNhbnQgYmFzZWQgb24gdGhlIEYtc3RhdGlzdGljLiBBZGp1c3RlZCBSLXNxdWFyZWQgaW5kaWNhdGVzIG1vZGVyYXRlIGV4cGxhbmF0b3J5IHBvd2VyLg0KDQoNCmBgYHtyfQ0KcmVncmVzc2lvbjEkem9uZSA8LSBpZmVsc2UocmVncmVzc2lvbjEkem9uZSA9PSAid2VzdCIsIDEsIDApDQpgYGANCg0KRGVmaW5lIHpvbmUgYXMgMSBhbmQgMC4gDQpDcmVhdGluZyBhIG5ldyBjb2x1bW4uIElmIGl0IGlzIHdlc3QgaXMgMSBvdGhlcndpc2UgaXMgMA0KYGBge3J9DQojVmlldyhyZWdyZXNzaW9uMSkNCmBgYA0KDQpJZiB1IHRha2UgdGhlICMgaXQgd2lsbCBoYXZlIHpvbmUgYXMgMSBhbmQgMA0KDQpgYGB7cn0NCnN0cihyZWdyZXNzaW9uMSkNCmBgYA0KDQoNCmBgYHtyfQ0KI3JlZ3Jlc3Npb24xJHpvbmU8LWFzLmludGVnZXIoKHJlZ3Jlc3Npb24xJHpvbmUpLHJlcGxhY2U9VFJVRSkgd2FzIG5vdCBuZWNlc3NhcnkNCmBgYA0KDQoNCg0KDQoNCmBgYHtyfQ0KaW50ZXJhY3Rpb248LXJlZ3Jlc3Npb24xJHpvbmUqcmVncmVzc2lvbjEkcG9wdWxhdGlvbg0KYGBgDQoNCmNyZWF0aW5nIGFuIGludGVyYWN0aW9uIHRlcm0gYmV0d2VlbiB6b25lIGFuZCBwb3B1bGF0aW9uDQoNCg0KYGBge3J9DQpyZWcuZml0MzwtbG0ocmVncmVzc2lvbjEkaW5jaWRlbnRzfmludGVyYWN0aW9uK3JlZ3Jlc3Npb24xJHBvcHVsYXRpb24rcmVncmVzc2lvbjEkem9uZSkNCmBgYA0KDQoNCg0KYGBge3J9DQpzdW1tYXJ5KHJlZy5maXQzKQ0KYGBgDQoNCkJhc2VkIG9uIHRoZSBvdXRwdXQgb2J0YWluZWQgYWJvdmUsIHBsZWFzZSBhbnN3ZXIgdGhlIGZvbGxvd2luZyBxdWVzdGlvbjoNCg0KSXMgUG9wdWxhdGlvbiBzaWduaWZpY2FudCBhdCBhIDUlIHNpZ25pZmljYW5jZSBsZXZlbD8gDQpJcyBab25lIHNpZ25pZmljYW50IGF0IGEgNSUgc2lnbmlmaWNhbmNlIGxldmVsPw0KSXMgdGhlIGludGVyYWN0aW9uIHRlcm0gc2lnbmlmaWNhbnQgYXQgYSA1JSBzaWduaWZpY2FuY2UgbGV2ZWw/IFdoYXQgaXMgdGhlIGFkanVzdGVkLVIgc3F1YXJlZCBvZiB0aGUgbW9kZWw/DQoNClBvcHVsYXRpb24gaXMgbm90IHN0YXRpc3RpY2FsbHkgc2lnbmlmaWNhbnQgYXQgdGhlIDUlIGxldmVsLg0KWm9uZSBpcyBzdGF0aXN0aWNhbGx5IHNpZ25pZmljYW50IGF0IHRoZSA1JSBsZXZlbC4NClRoZSBpbnRlcmFjdGlvbiB0ZXJtIGlzIG5vdCBzdGF0aXN0aWNhbGx5IHNpZ25pZmljYW50IGF0IHRoZSA1JSBsZXZlbC4NClRoZSBtb2RlbCwgYXMgYSB3aG9sZSwgaXMgc3RhdGlzdGljYWxseSBzaWduaWZpY2FudCBiYXNlZCBvbiB0aGUgRi1zdGF0aXN0aWMuDQpBZGp1c3RlZCBSLXNxdWFyZWQgaW5kaWNhdGVzIG1vZGVyYXRlIGV4cGxhbmF0b3J5IHBvd2VyLg0KDQoNCmBgYHtyfQ0KcmVnLmZpdDQ8LWxtKHJlZ3Jlc3Npb24xJGluY2lkZW50c35pbnRlcmFjdGlvbikNCmBgYA0KDQoNCmBgYHtyfQ0Kc3VtbWFyeShyZWcuZml0NCkNCmBgYA0KDQpMZXQgdXMgbm93IHJ1biBhIG1vZGVsIHdoZXJlIHRoZSBvbmx5IGZlYXR1cmUgaXMgdGhlIGludGVyYWN0aW9uIHRlcm0uIA0KDQpJcyB0aGUgaW50ZXJhY3Rpb24gdGVybSBzaWduaWZpY2FudCBhdCBhIDUlIHNpZ25pZmljYW5jZSBsZXZlbD8gV2hhdCBpcyB0aGUgYWRqdXN0ZWQtUiBzcXVhcmVkIG9mIHRoZSBtb2RlbD8NCg0KVGhlIGludGVyYWN0aW9uIHRlcm0gaXMgc3RhdGlzdGljYWxseSBzaWduaWZpY2FudCBhdCB0aGUgNSUgbGV2ZWwuDQpUaGUgbW9kZWwsIGludGVyYWN0aW9uIHRlcm0gYXMgYSBwcmVkaWN0b3IsIGhhcyBhIG1vZGVyYXRlIGV4cGxhbmF0b3J5IHBvd2VyDQo=