Problem One: Fill in the Missing Values
# Tstat = how far are we/how far we'd expect to be
Tint = -17.5791/6.758
Tint
## [1] -2.601228
Tspeed = 3.9324/0.4155
Tspeed
## [1] 9.46426
#Pvalue (tstat)
speedp = pt(3.9324, 48, lower.tail = FALSE)
(speedp)*2
## [1] 0.0002697996
#The risidual SE has 48 degrees of freedom because we need to estimate both B1 and B0
#Multiple R squared = SSexp/SStot
Rsq = 21186/(21186+11354)
Rsq
## [1] 0.6510756
#ANOVA table
#MS = SS/df (Fstat has df = 1 and 48)
MSreg = 21186/1
MSreg
## [1] 21186
MSres = 11354/48
MSres
## [1] 236.5417
Fstat = MSreg/MSres
Fstat
## [1] 89.56562
pf(89.6, 1, 48, lower.tail=FALSE)
## [1] 1.481223e-12
Problem 2: Carseats
#ntall.packages("ISLR")
library(ISLR)
## Warning: package 'ISLR' was built under R version 4.0.3
data(Carseats)
names(Carseats)
## [1] "Sales" "CompPrice" "Income" "Advertising" "Population"
## [6] "Price" "ShelveLoc" "Age" "Education" "Urban"
## [11] "US"
#a. Sales is a numerical variabe, Price is a numeric variable, Ubran is a factor with 2 levels (No and Yes), and US is a factor with two levels (No and Yes)
str(Carseats)
## 'data.frame': 400 obs. of 11 variables:
## $ Sales : num 9.5 11.22 10.06 7.4 4.15 ...
## $ CompPrice : num 138 111 113 117 141 124 115 136 132 132 ...
## $ Income : num 73 48 35 100 64 113 105 81 110 113 ...
## $ Advertising: num 11 16 10 4 3 13 0 15 0 0 ...
## $ Population : num 276 260 269 466 340 501 45 425 108 131 ...
## $ Price : num 120 83 80 97 128 72 108 120 124 124 ...
## $ ShelveLoc : Factor w/ 3 levels "Bad","Good","Medium": 1 2 3 3 1 1 3 2 3 3 ...
## $ Age : num 42 65 59 55 38 78 71 67 76 76 ...
## $ Education : num 17 10 12 14 13 16 15 10 10 17 ...
## $ Urban : Factor w/ 2 levels "No","Yes": 2 2 2 2 2 1 2 2 1 1 ...
## $ US : Factor w/ 2 levels "No","Yes": 2 2 2 2 1 2 1 2 1 2 ...
#b. Model
mod <- lm(Sales ~ Price+Urban+US, Carseats)
summary(mod)
##
## Call:
## lm(formula = Sales ~ Price + Urban + US, data = Carseats)
##
## Residuals:
## Min 1Q Median 3Q Max
## -6.9206 -1.6220 -0.0564 1.5786 7.0581
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 13.043469 0.651012 20.036 < 2e-16 ***
## Price -0.054459 0.005242 -10.389 < 2e-16 ***
## UrbanYes -0.021916 0.271650 -0.081 0.936
## USYes 1.200573 0.259042 4.635 4.86e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.472 on 396 degrees of freedom
## Multiple R-squared: 0.2393, Adjusted R-squared: 0.2335
## F-statistic: 41.52 on 3 and 396 DF, p-value: < 2.2e-16
#c.With all else held constant...
#For every unit up in Price, sales drop by 0.054 units (the line that is created)
#If the car was Urban (UrbanYes), then sales drop by 0.022 units compared to non-Urban (y-int shift)
#If the car was US built (USYes), then sales increased by 1.2 units compard to non-US (y-intercept shift)
#d. Y = 13.043 - 0.054price - 0.022urban + 1.2US
#Urban and US are catagorical and so, if no = 0 and make's their beta = 0, but "yes" = 1 and will result in a y-intercept shift
#e.US and Price both have significnat p-values and so I can reject the null and conclude that there is a relationship between sales and price and between sales and US
#f.
mod2 <- lm(Sales ~ Price + US, Carseats)
summary(mod2)
##
## Call:
## lm(formula = Sales ~ Price + US, data = Carseats)
##
## Residuals:
## Min 1Q Median 3Q Max
## -6.9269 -1.6286 -0.0574 1.5766 7.0515
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 13.03079 0.63098 20.652 < 2e-16 ***
## Price -0.05448 0.00523 -10.416 < 2e-16 ***
## USYes 1.19964 0.25846 4.641 4.71e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.469 on 397 degrees of freedom
## Multiple R-squared: 0.2393, Adjusted R-squared: 0.2354
## F-statistic: 62.43 on 2 and 397 DF, p-value: < 2.2e-16
#g. mod MSE = 0.2393 and mod2 MSE = 0.2393: Same fit
#h.CIs: Becaue we are estimating, it is hard to be precise about exactly where our stats are. A 95% CI tells us that, absent anyother information, 95% of the intervals constructed like this will hold the true stat value (So true value is somewhere between these two numbers)
confint(mod)
## 2.5 % 97.5 %
## (Intercept) 11.76359670 14.32334118
## Price -0.06476419 -0.04415351
## UrbanYes -0.55597316 0.51214085
## USYes 0.69130419 1.70984121