Problem One: Fill in the Missing Values

 # Tstat = how far are we/how far we'd expect to be 
   Tint = -17.5791/6.758
      Tint
## [1] -2.601228
   Tspeed = 3.9324/0.4155
      Tspeed
## [1] 9.46426
  #Pvalue (tstat)
      speedp = pt(3.9324, 48, lower.tail = FALSE)
      (speedp)*2
## [1] 0.0002697996
  #The risidual SE has 48 degrees of freedom because we need to estimate both B1 and B0 
  #Multiple R squared = SSexp/SStot
      Rsq = 21186/(21186+11354)
        Rsq
## [1] 0.6510756
 #ANOVA table
    #MS = SS/df (Fstat has df = 1 and 48)
      MSreg = 21186/1
        MSreg
## [1] 21186
      MSres = 11354/48
        MSres
## [1] 236.5417
      Fstat = MSreg/MSres
        Fstat
## [1] 89.56562
        pf(89.6, 1, 48, lower.tail=FALSE)
## [1] 1.481223e-12

Problem 2: Carseats

#ntall.packages("ISLR")
library(ISLR)
## Warning: package 'ISLR' was built under R version 4.0.3
data(Carseats)
names(Carseats)
##  [1] "Sales"       "CompPrice"   "Income"      "Advertising" "Population" 
##  [6] "Price"       "ShelveLoc"   "Age"         "Education"   "Urban"      
## [11] "US"
#a. Sales is a numerical variabe, Price is a numeric variable, Ubran is a factor with 2 levels (No and Yes), and US is a factor with two levels (No and Yes)
   str(Carseats)
## 'data.frame':    400 obs. of  11 variables:
##  $ Sales      : num  9.5 11.22 10.06 7.4 4.15 ...
##  $ CompPrice  : num  138 111 113 117 141 124 115 136 132 132 ...
##  $ Income     : num  73 48 35 100 64 113 105 81 110 113 ...
##  $ Advertising: num  11 16 10 4 3 13 0 15 0 0 ...
##  $ Population : num  276 260 269 466 340 501 45 425 108 131 ...
##  $ Price      : num  120 83 80 97 128 72 108 120 124 124 ...
##  $ ShelveLoc  : Factor w/ 3 levels "Bad","Good","Medium": 1 2 3 3 1 1 3 2 3 3 ...
##  $ Age        : num  42 65 59 55 38 78 71 67 76 76 ...
##  $ Education  : num  17 10 12 14 13 16 15 10 10 17 ...
##  $ Urban      : Factor w/ 2 levels "No","Yes": 2 2 2 2 2 1 2 2 1 1 ...
##  $ US         : Factor w/ 2 levels "No","Yes": 2 2 2 2 1 2 1 2 1 2 ...
#b. Model
   mod <- lm(Sales ~ Price+Urban+US, Carseats)
   summary(mod)
## 
## Call:
## lm(formula = Sales ~ Price + Urban + US, data = Carseats)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -6.9206 -1.6220 -0.0564  1.5786  7.0581 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 13.043469   0.651012  20.036  < 2e-16 ***
## Price       -0.054459   0.005242 -10.389  < 2e-16 ***
## UrbanYes    -0.021916   0.271650  -0.081    0.936    
## USYes        1.200573   0.259042   4.635 4.86e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.472 on 396 degrees of freedom
## Multiple R-squared:  0.2393, Adjusted R-squared:  0.2335 
## F-statistic: 41.52 on 3 and 396 DF,  p-value: < 2.2e-16
#c.With all else held constant... 
  #For every unit up in Price, sales drop by 0.054 units (the line that is created)
  #If the car was Urban (UrbanYes), then sales drop by 0.022 units compared to non-Urban (y-int shift)
  #If the car was US built (USYes), then sales increased by 1.2 units compard to non-US (y-intercept shift)

#d. Y = 13.043 - 0.054price - 0.022urban + 1.2US
        #Urban and US are catagorical and so, if no = 0 and make's their beta = 0, but "yes" = 1 and will result in a y-intercept shift

#e.US and Price both have significnat p-values and so I can reject the null and conclude that there is a relationship between sales and price and between sales and US

#f.
   mod2 <- lm(Sales ~ Price + US, Carseats)
   summary(mod2)
## 
## Call:
## lm(formula = Sales ~ Price + US, data = Carseats)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -6.9269 -1.6286 -0.0574  1.5766  7.0515 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 13.03079    0.63098  20.652  < 2e-16 ***
## Price       -0.05448    0.00523 -10.416  < 2e-16 ***
## USYes        1.19964    0.25846   4.641 4.71e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.469 on 397 degrees of freedom
## Multiple R-squared:  0.2393, Adjusted R-squared:  0.2354 
## F-statistic: 62.43 on 2 and 397 DF,  p-value: < 2.2e-16
#g. mod MSE = 0.2393 and mod2 MSE = 0.2393: Same fit 

#h.CIs: Becaue we are estimating, it is hard to be precise about exactly where our stats are. A 95% CI tells us that, absent anyother information, 95% of the intervals constructed like this will hold the true stat value (So true value is somewhere between these two numbers)
   confint(mod) 
##                   2.5 %      97.5 %
## (Intercept) 11.76359670 14.32334118
## Price       -0.06476419 -0.04415351
## UrbanYes    -0.55597316  0.51214085
## USYes        0.69130419  1.70984121