pairs(Auto)
cor(Auto[1:8])
## mpg cylinders displacement horsepower weight
## mpg 1.0000000 -0.7776175 -0.8051269 -0.7784268 -0.8322442
## cylinders -0.7776175 1.0000000 0.9508233 0.8429834 0.8975273
## displacement -0.8051269 0.9508233 1.0000000 0.8972570 0.9329944
## horsepower -0.7784268 0.8429834 0.8972570 1.0000000 0.8645377
## weight -0.8322442 0.8975273 0.9329944 0.8645377 1.0000000
## acceleration 0.4233285 -0.5046834 -0.5438005 -0.6891955 -0.4168392
## year 0.5805410 -0.3456474 -0.3698552 -0.4163615 -0.3091199
## origin 0.5652088 -0.5689316 -0.6145351 -0.4551715 -0.5850054
## acceleration year origin
## mpg 0.4233285 0.5805410 0.5652088
## cylinders -0.5046834 -0.3456474 -0.5689316
## displacement -0.5438005 -0.3698552 -0.6145351
## horsepower -0.6891955 -0.4163615 -0.4551715
## weight -0.4168392 -0.3091199 -0.5850054
## acceleration 1.0000000 0.2903161 0.2127458
## year 0.2903161 1.0000000 0.1815277
## origin 0.2127458 0.1815277 1.0000000
names(Auto)
## [1] "mpg" "cylinders" "displacement" "horsepower" "weight"
## [6] "acceleration" "year" "origin" "name"
auto.lm = lm(mpg ~ .-name, Auto)
summary(auto.lm)
##
## Call:
## lm(formula = mpg ~ . - name, data = Auto)
##
## Residuals:
## Min 1Q Median 3Q Max
## -9.5903 -2.1565 -0.1169 1.8690 13.0604
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -17.218435 4.644294 -3.707 0.00024 ***
## cylinders -0.493376 0.323282 -1.526 0.12780
## displacement 0.019896 0.007515 2.647 0.00844 **
## horsepower -0.016951 0.013787 -1.230 0.21963
## weight -0.006474 0.000652 -9.929 < 2e-16 ***
## acceleration 0.080576 0.098845 0.815 0.41548
## year 0.750773 0.050973 14.729 < 2e-16 ***
## origin 1.426141 0.278136 5.127 4.67e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.328 on 384 degrees of freedom
## Multiple R-squared: 0.8215, Adjusted R-squared: 0.8182
## F-statistic: 252.4 on 7 and 384 DF, p-value: < 2.2e-16
Looking at the summary of the linear model we can see that we have a very small p-value which leads us to believe that there is a relationship between mpg and its predictors.
Predictors that are statistically significant are displacement, weight, year, and origin.
We can assume that with one unit increase in mpg we can also expect a .75 increase in year.
par(mfrow = c(2, 2))
plot(auto.lm)
auto.lm.int = lm(mpg ~ displacement * cylinders + displacement * weight, Auto[, 1:8])
summary(auto.lm.int)
##
## Call:
## lm(formula = mpg ~ displacement * cylinders + displacement *
## weight, data = Auto[, 1:8])
##
## Residuals:
## Min 1Q Median 3Q Max
## -13.2934 -2.5184 -0.3476 1.8399 17.7723
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5.262e+01 2.237e+00 23.519 < 2e-16 ***
## displacement -7.351e-02 1.669e-02 -4.403 1.38e-05 ***
## cylinders 7.606e-01 7.669e-01 0.992 0.322
## weight -9.888e-03 1.329e-03 -7.438 6.69e-13 ***
## displacement:cylinders -2.986e-03 3.426e-03 -0.872 0.384
## displacement:weight 2.128e-05 5.002e-06 4.254 2.64e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.103 on 386 degrees of freedom
## Multiple R-squared: 0.7272, Adjusted R-squared: 0.7237
## F-statistic: 205.8 on 5 and 386 DF, p-value: < 2.2e-16
par(mfrow = c(2, 2))
plot(log(Auto$weight), Auto$mpg)
plot(sqrt(Auto$weight), Auto$mpg)
plot((Auto$weight)^2, Auto$mpg)
data(Carseats)
#names(Carseats)
car.seats = lm(Sales ~ Price + Urban + US, Carseats)
summary(car.seats)
##
## Call:
## lm(formula = Sales ~ Price + Urban + US, data = Carseats)
##
## Residuals:
## Min 1Q Median 3Q Max
## -6.9206 -1.6220 -0.0564 1.5786 7.0581
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 13.043469 0.651012 20.036 < 2e-16 ***
## Price -0.054459 0.005242 -10.389 < 2e-16 ***
## UrbanYes -0.021916 0.271650 -0.081 0.936
## USYes 1.200573 0.259042 4.635 4.86e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.472 on 396 degrees of freedom
## Multiple R-squared: 0.2393, Adjusted R-squared: 0.2335
## F-statistic: 41.52 on 3 and 396 DF, p-value: < 2.2e-16
Looking at the linear model we first see that Price is significant in relation to sales, while also being able to say with a one unit increase in price we can expect a -,054 decrease in sales. Next is UrbanYes, which we see is not significant toward our sales variables, while also being able to say that in urban areas sales are -.021 less than that in rural areas. Lastly we see that the USYes variable is also significant and can assume that with US stores have 1.2 more sales than those that are not in the US.
Sales=13.0434689+(−0.054)×Price+(−0.021)×Urban+(1.2)×US+ε
As mentioned in part b. we can reject the null hypothesis for Price and US.
car.seats.back = lm(Sales ~ US + Price, Carseats)
summary(car.seats.back)
##
## Call:
## lm(formula = Sales ~ US + Price, data = Carseats)
##
## Residuals:
## Min 1Q Median 3Q Max
## -6.9269 -1.6286 -0.0574 1.5766 7.0515
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 13.03079 0.63098 20.652 < 2e-16 ***
## USYes 1.19964 0.25846 4.641 4.71e-06 ***
## Price -0.05448 0.00523 -10.416 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.469 on 397 degrees of freedom
## Multiple R-squared: 0.2393, Adjusted R-squared: 0.2354
## F-statistic: 62.43 on 2 and 397 DF, p-value: < 2.2e-16
To get the idea of which model fits best, we can look at the r2 for each of the models. Surprisingly, both r2 are identical for each model.
confint(car.seats.back, level = .95)
## 2.5 % 97.5 %
## (Intercept) 11.79032020 14.27126531
## USYes 0.69151957 1.70776632
## Price -0.06475984 -0.04419543
par(mfrow = c(2, 2))
plot(car.seats.back)
##12.
###a. They are the same when each of their denominators are equal to one another.
###b.
set.seed(6)
x = 1:100
sum(x^2)
## [1] 338350
y <- 2 * x + rnorm(100, sd = 0.1)
sum(y^2)
## [1] 1353274
y.lm <- lm(y ~ x + 0)
x.lm <- lm(x ~ y + 0)
summary(y.lm)
##
## Call:
## lm(formula = y ~ x + 0)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.186671 -0.064202 -0.009248 0.063055 0.268433
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## x 1.9999059 0.0001774 11272 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1032 on 99 degrees of freedom
## Multiple R-squared: 1, Adjusted R-squared: 1
## F-statistic: 1.27e+08 on 1 and 99 DF, p-value: < 2.2e-16
summary(x.lm)
##
## Call:
## lm(formula = x ~ y + 0)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.134160 -0.031481 0.004661 0.032147 0.093411
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## y 5.000e-01 4.436e-05 11272 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.05161 on 99 degrees of freedom
## Multiple R-squared: 1, Adjusted R-squared: 1
## F-statistic: 1.27e+08 on 1 and 99 DF, p-value: < 2.2e-16
###c
x = 1:100
sum(x^2)
## [1] 338350
y = 100:1
sum(y^2)
## [1] 338350
Y <- lm(y ~ x + 0)
X <- lm(x ~ y + 0)
summary(Y)
##
## Call:
## lm(formula = y ~ x + 0)
##
## Residuals:
## Min 1Q Median 3Q Max
## -49.75 -12.44 24.87 62.18 99.49
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## x 0.5075 0.0866 5.86 6.09e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 50.37 on 99 degrees of freedom
## Multiple R-squared: 0.2575, Adjusted R-squared: 0.25
## F-statistic: 34.34 on 1 and 99 DF, p-value: 6.094e-08
summary(X)
##
## Call:
## lm(formula = x ~ y + 0)
##
## Residuals:
## Min 1Q Median 3Q Max
## -49.75 -12.44 24.87 62.18 99.49
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## y 0.5075 0.0866 5.86 6.09e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 50.37 on 99 degrees of freedom
## Multiple R-squared: 0.2575, Adjusted R-squared: 0.25
## F-statistic: 34.34 on 1 and 99 DF, p-value: 6.094e-08