title: “Handout_Reg” output: html_document: df_print: paged html_notebook: default pdf_document: default —

getwd()
[1] "/cloud/project"
# read the CSV with headers
regression1<-read.csv("incidents.csv", header=T,sep =",")
#View(regression1)
summary(regression1)
     area               zone          
 Length:16          Length:16         
 Class :character   Class :character  
 Mode  :character   Mode  :character  
                                      
                                      
                                      
  population          incidents     
 Length:16          Min.   : 103.0  
 Class :character   1st Qu.: 277.8  
 Mode  :character   Median : 654.0  
                    Mean   : 695.2  
                    3rd Qu.: 853.0  
                    Max.   :2072.0  
str(regression1)
'data.frame':   16 obs. of  4 variables:
 $ area      : chr  "Boulder" "California-lexington" "Huntsville" "Seattle" ...
 $ zone      : chr  "west" "east" "east" "west" ...
 $ population: chr  "107,353" "326,534" "444,752" "750,000" ...
 $ incidents : int  605 103 161 1703 1003 527 721 704 105 403 ...
regression1$population <- as.numeric(gsub(",","",regression1$population))
regression1$population
 [1]  107353  326534  444752  750000   64403
 [6] 2744878 1600000 2333000 1572816  712091
[11] 6900000 2700000 4900000 4200000 5200000
[16] 7100000
str(regression1$population)
 num [1:16] 107353 326534 444752 750000 64403 ...
regression2<-regression1[,-1]#new data frame with the deletion of column 1 
head(regression2)
reg.fit1<-lm(regression1$incidents ~ regression1$population)
summary(reg.fit1)

Call:
lm(formula = regression1$incidents ~ regression1$population)

Residuals:
   Min     1Q Median     3Q    Max 
-684.5 -363.5 -156.2  133.9 1164.7 

Coefficients:
                        Estimate Std. Error
(Intercept)            4.749e+02  2.018e+02
regression1$population 8.462e-05  5.804e-05
                       t value Pr(>|t|)  
(Intercept)              2.353   0.0337 *
regression1$population   1.458   0.1669  
---
Signif. codes:  
0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 534.9 on 14 degrees of freedom
Multiple R-squared:  0.1318,    Adjusted R-squared:  0.0698 
F-statistic: 2.126 on 1 and 14 DF,  p-value: 0.1669

Based on the output obtained above, please answer the following question:

Is Population significant at a 5% significance level? What is the adjusted-R squared of the model?

reg.fit2<-lm(incidents ~ zone+population, data = regression1)
summary(reg.fit2)

Call:
lm(formula = incidents ~ zone + population, data = regression1)

Residuals:
    Min      1Q  Median      3Q     Max 
-537.21 -273.14  -57.89  188.17  766.03 

Coefficients:
             Estimate Std. Error t value
(Intercept) 1.612e+02  1.675e+02   0.962
zonewest    7.266e+02  1.938e+02   3.749
population  6.557e-05  4.206e-05   1.559
            Pr(>|t|)   
(Intercept)  0.35363   
zonewest     0.00243 **
population   0.14300   
---
Signif. codes:  
0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 384.8 on 13 degrees of freedom
Multiple R-squared:  0.5828,    Adjusted R-squared:  0.5186 
F-statistic: 9.081 on 2 and 13 DF,  p-value: 0.003404

Based on the output obtained above, please answer the following question:

Are Population and/or Zone significant at a 5% significance level? What is the adjusted-R squared of the model?

regression1$zone <- ifelse(regression1$zone == "west", 1, 0)#Please explain the syntax and the output
#View(regression1)
str(regression1)
'data.frame':   16 obs. of  4 variables:
 $ area      : chr  "Boulder" "California-lexington" "Huntsville" "Seattle" ...
 $ zone      : num  1 0 0 1 1 0 1 1 0 0 ...
 $ population: num  107353 326534 444752 750000 64403 ...
 $ incidents : int  605 103 161 1703 1003 527 721 704 105 403 ...
#regression1$zone<-as.integer((regression1$zone),replace=TRUE) was not necessary
interaction<-regression1$zone*regression1$population#Explain the syntax
reg.fit3<-lm(regression1$incidents~interaction+regression1$population+regression1$zone)
summary(reg.fit3)

Call:
lm(formula = regression1$incidents ~ interaction + regression1$population + 
    regression1$zone)

Residuals:
    Min      1Q  Median      3Q     Max 
-540.91 -270.93  -59.56  187.99  767.99 

Coefficients:
                        Estimate Std. Error
(Intercept)            1.659e+02  2.313e+02
interaction            2.974e-06  9.469e-05
regression1$population 6.352e-05  7.868e-05
regression1$zone       7.192e+02  3.108e+02
                       t value Pr(>|t|)  
(Intercept)              0.717   0.4869  
interaction              0.031   0.9755  
regression1$population   0.807   0.4352  
regression1$zone         2.314   0.0392 *
---
Signif. codes:  
0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 400.5 on 12 degrees of freedom
Multiple R-squared:  0.5829,    Adjusted R-squared:  0.4786 
F-statistic: 5.589 on 3 and 12 DF,  p-value: 0.01237

Based on the output obtained above, please answer the following question:

Is Population significant at a 5% significance level? Is Zone significant at a 5% significance level? Is the interaction term significant at a 5% significance level? What is the adjusted-R squared of the model?

Let us now run a model where the only feature is the interaction term.

Is the interaction term significant at a 5% significance level? What is the adjusted-R squared of the model?

reg.fit4<-lm(regression1$incidents~interaction)
summary(reg.fit4)

Call:
lm(formula = regression1$incidents ~ interaction)

Residuals:
    Min      1Q  Median      3Q     Max 
-650.28 -301.09  -83.71  123.23 1103.76 

Coefficients:
             Estimate Std. Error t value
(Intercept) 4.951e+02  1.320e+02   3.751
interaction 1.389e-04  4.737e-05   2.932
            Pr(>|t|)   
(Intercept)  0.00215 **
interaction  0.01093 * 
---
Signif. codes:  
0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 451.9 on 14 degrees of freedom
Multiple R-squared:  0.3804,    Adjusted R-squared:  0.3361 
F-statistic: 8.595 on 1 and 14 DF,  p-value: 0.01093

Which of the models run above would you choose to make predictions? Why??

LS0tCnRpdGxlOiAiUiBOb3RlYm9vayIKb3V0cHV0OiBodG1sX25vdGVib29rCi0tLQp0aXRsZTogIkhhbmRvdXRfUmVnIgpvdXRwdXQ6CiAgaHRtbF9kb2N1bWVudDoKICAgIGRmX3ByaW50OiBwYWdlZAogIGh0bWxfbm90ZWJvb2s6IGRlZmF1bHQKICBwZGZfZG9jdW1lbnQ6IGRlZmF1bHQKLS0tCgotLS0KdGl0bGU6ICJIYW5kb3V0XzExX3JlZyIKb3V0cHV0OiBodG1sX25vdGVib29rCi0tLQoKCgoKYGBge3J9CmdldHdkKCkKYGBgCgoKCgpgYGB7cn0KIyByZWFkIHRoZSBDU1Ygd2l0aCBoZWFkZXJzCnJlZ3Jlc3Npb24xPC1yZWFkLmNzdigiaW5jaWRlbnRzLmNzdiIsIGhlYWRlcj1ULHNlcCA9IiwiKQoKCmBgYAoKCgpgYGB7cn0KI1ZpZXcocmVncmVzc2lvbjEpCmBgYAoKCgpgYGB7cn0Kc3VtbWFyeShyZWdyZXNzaW9uMSkKYGBgCgoKYGBge3J9CnN0cihyZWdyZXNzaW9uMSkKYGBgCgoKYGBge3J9CnJlZ3Jlc3Npb24xJHBvcHVsYXRpb24gPC0gYXMubnVtZXJpYyhnc3ViKCIsIiwiIixyZWdyZXNzaW9uMSRwb3B1bGF0aW9uKSkKcmVncmVzc2lvbjEkcG9wdWxhdGlvbgpgYGAKCmBgYHtyfQpzdHIocmVncmVzc2lvbjEkcG9wdWxhdGlvbikKYGBgCgoKYGBge3J9CnJlZ3Jlc3Npb24yPC1yZWdyZXNzaW9uMVssLTFdI25ldyBkYXRhIGZyYW1lIHdpdGggdGhlIGRlbGV0aW9uIG9mIGNvbHVtbiAxIApgYGAKCgoKYGBge3J9CmhlYWQocmVncmVzc2lvbjIpCmBgYAoKCgoKYGBge3J9CnJlZy5maXQxPC1sbShyZWdyZXNzaW9uMSRpbmNpZGVudHMgfiByZWdyZXNzaW9uMSRwb3B1bGF0aW9uKQpgYGAKCgoKYGBge3J9CnN1bW1hcnkocmVnLmZpdDEpCmBgYAoKCkJhc2VkIG9uIHRoZSBvdXRwdXQgb2J0YWluZWQgYWJvdmUsIHBsZWFzZSBhbnN3ZXIgdGhlIGZvbGxvd2luZyBxdWVzdGlvbjoKCklzIFBvcHVsYXRpb24gc2lnbmlmaWNhbnQgYXQgYSA1JSBzaWduaWZpY2FuY2UgbGV2ZWw/IFdoYXQgaXMgdGhlIGFkanVzdGVkLVIgc3F1YXJlZCBvZiB0aGUgbW9kZWw/CgpgYGB7cn0KcmVnLmZpdDI8LWxtKGluY2lkZW50cyB+IHpvbmUrcG9wdWxhdGlvbiwgZGF0YSA9IHJlZ3Jlc3Npb24xKQpgYGAKCgoKYGBge3J9CnN1bW1hcnkocmVnLmZpdDIpCmBgYAoKQmFzZWQgb24gdGhlIG91dHB1dCBvYnRhaW5lZCBhYm92ZSwgcGxlYXNlIGFuc3dlciB0aGUgZm9sbG93aW5nIHF1ZXN0aW9uOgoKQXJlIFBvcHVsYXRpb24gYW5kL29yIFpvbmUgIHNpZ25pZmljYW50IGF0IGEgNSUgc2lnbmlmaWNhbmNlIGxldmVsPyBXaGF0IGlzIHRoZSBhZGp1c3RlZC1SIHNxdWFyZWQgb2YgdGhlIG1vZGVsPwoKCgoKYGBge3J9CnJlZ3Jlc3Npb24xJHpvbmUgPC0gaWZlbHNlKHJlZ3Jlc3Npb24xJHpvbmUgPT0gIndlc3QiLCAxLCAwKSNQbGVhc2UgZXhwbGFpbiB0aGUgc3ludGF4IGFuZCB0aGUgb3V0cHV0CmBgYAoKCmBgYHtyfQojVmlldyhyZWdyZXNzaW9uMSkKYGBgCgoKCmBgYHtyfQpzdHIocmVncmVzc2lvbjEpCmBgYAoKCmBgYHtyfQojcmVncmVzc2lvbjEkem9uZTwtYXMuaW50ZWdlcigocmVncmVzc2lvbjEkem9uZSkscmVwbGFjZT1UUlVFKSB3YXMgbm90IG5lY2Vzc2FyeQpgYGAKCgpgYGB7cn0KaW50ZXJhY3Rpb248LXJlZ3Jlc3Npb24xJHpvbmUqcmVncmVzc2lvbjEkcG9wdWxhdGlvbiNFeHBsYWluIHRoZSBzeW50YXgKYGBgCgoKYGBge3J9CnJlZy5maXQzPC1sbShyZWdyZXNzaW9uMSRpbmNpZGVudHN+aW50ZXJhY3Rpb24rcmVncmVzc2lvbjEkcG9wdWxhdGlvbityZWdyZXNzaW9uMSR6b25lKQpgYGAKCgoKYGBge3J9CnN1bW1hcnkocmVnLmZpdDMpCmBgYAoKCkJhc2VkIG9uIHRoZSBvdXRwdXQgb2J0YWluZWQgYWJvdmUsIHBsZWFzZSBhbnN3ZXIgdGhlIGZvbGxvd2luZyBxdWVzdGlvbjoKCklzIFBvcHVsYXRpb24gc2lnbmlmaWNhbnQgYXQgYSA1JSBzaWduaWZpY2FuY2UgbGV2ZWw/IApJcyBab25lIHNpZ25pZmljYW50IGF0IGEgNSUgc2lnbmlmaWNhbmNlIGxldmVsPwpJcyB0aGUgaW50ZXJhY3Rpb24gdGVybSBzaWduaWZpY2FudCBhdCBhIDUlIHNpZ25pZmljYW5jZSBsZXZlbD8gV2hhdCBpcyB0aGUgYWRqdXN0ZWQtUiBzcXVhcmVkIG9mIHRoZSBtb2RlbD8KCi0gUG9wdWxhdGlvbjogTm90IHNpZ25pZmljYW50IGF0IHRoZSA1JSBsZXZlbCAocCA9IDAuNDM1MiA+IDAuMDUpLgoKLSBab25lOiBTaWduaWZpY2FudCBhdCB0aGUgNSUgbGV2ZWwgKHAgPSAwLjAzOTIgPCAwLjA1KS4KCi0gSW50ZXJhY3Rpb24gdGVybTogTm90IHNpZ25pZmljYW50IGF0IHRoZSA1JSBsZXZlbCAocCA9IDAuOTc1NSA+IDAuMDUpLgoKLSBBZGp1c3RlZCBSLXNxdWFyZWQ6IDAuNDc4Ni4KCkxldCB1cyBub3cgcnVuIGEgbW9kZWwgd2hlcmUgdGhlIG9ubHkgZmVhdHVyZSBpcyB0aGUgaW50ZXJhY3Rpb24gdGVybS4gCgpJcyB0aGUgaW50ZXJhY3Rpb24gdGVybSBzaWduaWZpY2FudCBhdCBhIDUlIHNpZ25pZmljYW5jZSBsZXZlbD8gV2hhdCBpcyB0aGUgYWRqdXN0ZWQtUiBzcXVhcmVkIG9mIHRoZSBtb2RlbD8KCi0gSW50ZXJhY3Rpb24gdGVybTogTm90IHNpZ25pZmljYW50IGF0IHRoZSA1JSBsZXZlbCAocCA9IDAuOTc1NSA+IDAuMDUpLgoKLSBBZGp1c3RlZCBSLXNxdWFyZWQ6IDAuNDc4Ni4KCmBgYHtyfQpyZWcuZml0NDwtbG0ocmVncmVzc2lvbjEkaW5jaWRlbnRzfmludGVyYWN0aW9uKQpgYGAKCgpgYGB7cn0Kc3VtbWFyeShyZWcuZml0NCkKYGBgCgpXaGljaCBvZiB0aGUgbW9kZWxzIHJ1biBhYm92ZSB3b3VsZCB5b3UgY2hvb3NlIHRvIG1ha2UgcHJlZGljdGlvbnM/IFdoeT8/CgotIEkgd291bGQgY2hvb3NlIHRoZSBmdWxsIG1vZGVsIHdpdGggcG9wdWxhdGlvbiwgem9uZSwgYW5kIHRoZSBpbnRlcmFjdGlvbiB0ZXJtIGZvciBtYWtpbmcgcHJlZGljdGlvbnMgYmVjYXVzZSBpdCBleHBsYWlucyBtb3JlIG9mIHRoZSB2YXJpYXRpb24gaW4gaW5jaWRlbnRzIChhZGp1c3RlZCBSwrIgPSAwLjQ3ODYpLiBFdmVuIHRob3VnaCBvbmx5IHpvbmUgaXMgc3RhdGlzdGljYWxseSBzaWduaWZpY2FudCwgdGhpcyBtb2RlbCBpbmNsdWRlcyBib3RoIG1haW4gZWZmZWN0cyBhbmQgdGhlaXIgaW50ZXJhY3Rpb24sIG1ha2luZyBpdCBtb3JlIGNvbXBsZXRlIGFuZCByZWxpYWJsZSB0aGFuIHRoZSBpbnRlcmFjdGlvbi1vbmx5IG1vZGVsLCB3aGljaCBleHBsYWlucyBsZXNzIGFuZCBsZWF2ZXMgb3V0IGltcG9ydGFudCBpbmZvcm1hdGlvbi4KCg==