Question 2
Auto <- read.csv("https://www.statlearning.com/s/Auto.csv",
header = TRUE, na.strings = "?")
Auto <- na.omit(Auto)
plot(Auto)

Auto_no_name <- Auto[, 1:8]
cor(Auto_no_name)
## mpg cylinders displacement horsepower weight
## mpg 1.0000000 -0.7776175 -0.8051269 -0.7784268 -0.8322442
## cylinders -0.7776175 1.0000000 0.9508233 0.8429834 0.8975273
## displacement -0.8051269 0.9508233 1.0000000 0.8972570 0.9329944
## horsepower -0.7784268 0.8429834 0.8972570 1.0000000 0.8645377
## weight -0.8322442 0.8975273 0.9329944 0.8645377 1.0000000
## acceleration 0.4233285 -0.5046834 -0.5438005 -0.6891955 -0.4168392
## year 0.5805410 -0.3456474 -0.3698552 -0.4163615 -0.3091199
## origin 0.5652088 -0.5689316 -0.6145351 -0.4551715 -0.5850054
## acceleration year origin
## mpg 0.4233285 0.5805410 0.5652088
## cylinders -0.5046834 -0.3456474 -0.5689316
## displacement -0.5438005 -0.3698552 -0.6145351
## horsepower -0.6891955 -0.4163615 -0.4551715
## weight -0.4168392 -0.3091199 -0.5850054
## acceleration 1.0000000 0.2903161 0.2127458
## year 0.2903161 1.0000000 0.1815277
## origin 0.2127458 0.1815277 1.0000000
mpg_regression <- lm(mpg ~ cylinders + displacement + horsepower + weight + acceleration + year + origin, data = Auto_no_name)
summary(mpg_regression)
##
## Call:
## lm(formula = mpg ~ cylinders + displacement + horsepower + weight +
## acceleration + year + origin, data = Auto_no_name)
##
## Residuals:
## Min 1Q Median 3Q Max
## -9.5903 -2.1565 -0.1169 1.8690 13.0604
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -17.218435 4.644294 -3.707 0.00024 ***
## cylinders -0.493376 0.323282 -1.526 0.12780
## displacement 0.019896 0.007515 2.647 0.00844 **
## horsepower -0.016951 0.013787 -1.230 0.21963
## weight -0.006474 0.000652 -9.929 < 2e-16 ***
## acceleration 0.080576 0.098845 0.815 0.41548
## year 0.750773 0.050973 14.729 < 2e-16 ***
## origin 1.426141 0.278136 5.127 4.67e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.328 on 384 degrees of freedom
## Multiple R-squared: 0.8215, Adjusted R-squared: 0.8182
## F-statistic: 252.4 on 7 and 384 DF, p-value: < 2.2e-16
plot(mpg_regression)




mpg_regression_interactions <- lm(mpg ~ cylinders + displacement + horsepower + weight +
acceleration + year + origin + cylinders*displacement +
cylinders*horsepower + cylinders*weight + cylinders*acceleration +
cylinders*year + cylinders*origin + displacement*horsepower +
displacement*weight + displacement*acceleration + displacement*year, data = Auto_no_name)
summary(mpg_regression_interactions)
##
## Call:
## lm(formula = mpg ~ cylinders + displacement + horsepower + weight +
## acceleration + year + origin + cylinders * displacement +
## cylinders * horsepower + cylinders * weight + cylinders *
## acceleration + cylinders * year + cylinders * origin + displacement *
## horsepower + displacement * weight + displacement * acceleration +
## displacement * year, data = Auto_no_name)
##
## Residuals:
## Min 1Q Median 3Q Max
## -7.3915 -1.3790 -0.0618 1.1901 12.6516
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -3.772e+01 2.242e+01 -1.682 0.0934 .
## cylinders 4.843e+00 8.399e+00 0.577 0.5646
## displacement 6.672e-03 1.413e-01 0.047 0.9624
## horsepower -1.734e-01 6.898e-02 -2.514 0.0123 *
## weight -7.246e-03 3.129e-03 -2.316 0.0211 *
## acceleration -6.182e-01 4.343e-01 -1.423 0.1554
## year 1.415e+00 2.497e-01 5.667 2.91e-08 ***
## origin -1.253e+00 1.412e+00 -0.888 0.3751
## cylinders:displacement -1.572e-03 3.734e-03 -0.421 0.6740
## cylinders:horsepower 2.249e-02 1.944e-02 1.157 0.2481
## cylinders:weight -1.017e-04 8.260e-04 -0.123 0.9021
## cylinders:acceleration 2.972e-01 1.606e-01 1.851 0.0649 .
## cylinders:year -1.495e-01 9.694e-02 -1.542 0.1239
## cylinders:origin 4.544e-01 3.307e-01 1.374 0.1703
## displacement:horsepower -1.124e-04 2.729e-04 -0.412 0.6808
## displacement:weight 1.540e-05 1.049e-05 1.468 0.1429
## displacement:acceleration -5.566e-03 2.733e-03 -2.037 0.0424 *
## displacement:year 6.069e-04 1.636e-03 0.371 0.7108
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.827 on 374 degrees of freedom
## Multiple R-squared: 0.8746, Adjusted R-squared: 0.8689
## F-statistic: 153.4 on 17 and 374 DF, p-value: < 2.2e-16
mpg_regression_squared <- lm(mpg ~ (cylinders)^2 + (displacement)^2 + (horsepower)^2 + (weight)^2 +
(acceleration)^2 + (year)^2 + (origin)^2, data = Auto_no_name)
summary(mpg_regression_squared)
##
## Call:
## lm(formula = mpg ~ (cylinders)^2 + (displacement)^2 + (horsepower)^2 +
## (weight)^2 + (acceleration)^2 + (year)^2 + (origin)^2, data = Auto_no_name)
##
## Residuals:
## Min 1Q Median 3Q Max
## -9.5903 -2.1565 -0.1169 1.8690 13.0604
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -17.218435 4.644294 -3.707 0.00024 ***
## cylinders -0.493376 0.323282 -1.526 0.12780
## displacement 0.019896 0.007515 2.647 0.00844 **
## horsepower -0.016951 0.013787 -1.230 0.21963
## weight -0.006474 0.000652 -9.929 < 2e-16 ***
## acceleration 0.080576 0.098845 0.815 0.41548
## year 0.750773 0.050973 14.729 < 2e-16 ***
## origin 1.426141 0.278136 5.127 4.67e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.328 on 384 degrees of freedom
## Multiple R-squared: 0.8215, Adjusted R-squared: 0.8182
## F-statistic: 252.4 on 7 and 384 DF, p-value: < 2.2e-16
mpg_regression_log <- lm(mpg ~ log(cylinders) + log(displacement) + log(horsepower) + log(weight) +
log(acceleration) + log(year) + log(origin), data = Auto_no_name)
summary(mpg_regression_log)
##
## Call:
## lm(formula = mpg ~ log(cylinders) + log(displacement) + log(horsepower) +
## log(weight) + log(acceleration) + log(year) + log(origin),
## data = Auto_no_name)
##
## Residuals:
## Min 1Q Median 3Q Max
## -9.5987 -1.8172 -0.0181 1.5906 12.8132
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -66.5643 17.5053 -3.803 0.000167 ***
## log(cylinders) 1.4818 1.6589 0.893 0.372273
## log(displacement) -1.0551 1.5385 -0.686 0.493230
## log(horsepower) -6.9657 1.5569 -4.474 1.01e-05 ***
## log(weight) -12.5728 2.2251 -5.650 3.12e-08 ***
## log(acceleration) -4.9831 1.6078 -3.099 0.002082 **
## log(year) 54.9857 3.5555 15.465 < 2e-16 ***
## log(origin) 1.5822 0.5083 3.113 0.001991 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.069 on 384 degrees of freedom
## Multiple R-squared: 0.8482, Adjusted R-squared: 0.8454
## F-statistic: 306.5 on 7 and 384 DF, p-value: < 2.2e-16
mpg_regression_sqrt <- lm(mpg ~ sqrt(cylinders) + sqrt(displacement) + sqrt(horsepower) + sqrt(weight) +
sqrt(acceleration) + sqrt(year) + sqrt(origin), data = Auto_no_name)
summary(mpg_regression_sqrt)
##
## Call:
## lm(formula = mpg ~ sqrt(cylinders) + sqrt(displacement) + sqrt(horsepower) +
## sqrt(weight) + sqrt(acceleration) + sqrt(year) + sqrt(origin),
## data = Auto_no_name)
##
## Residuals:
## Min 1Q Median 3Q Max
## -9.5250 -1.9822 -0.1111 1.7347 13.0681
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -49.79814 9.17832 -5.426 1.02e-07 ***
## sqrt(cylinders) -0.23699 1.53753 -0.154 0.8776
## sqrt(displacement) 0.22580 0.22940 0.984 0.3256
## sqrt(horsepower) -0.77976 0.30788 -2.533 0.0117 *
## sqrt(weight) -0.62172 0.07898 -7.872 3.59e-14 ***
## sqrt(acceleration) -0.82529 0.83443 -0.989 0.3233
## sqrt(year) 12.79030 0.85891 14.891 < 2e-16 ***
## sqrt(origin) 3.26036 0.76767 4.247 2.72e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.21 on 384 degrees of freedom
## Multiple R-squared: 0.8338, Adjusted R-squared: 0.8308
## F-statistic: 275.3 on 7 and 384 DF, p-value: < 2.2e-16
Question 3
carseats <- ISLR::Carseats
carseats_new <- carseats %>%
mutate(US_yes = ifelse(US == "No", 0, 1)) %>%
mutate(Urban_yes = ifelse(Urban == "No", 0, 1)) %>%
mutate(Urban_no = ifelse(Urban == "Yes", 0, 1)) %>%
mutate(US_no = ifelse(US == "Yes", 0, 1)) %>%
select(-"Urban", -"US")
sales_regression <- lm(Sales ~ Price + Urban_yes + Urban_no + US_yes + US_no, data = carseats_new)
summary(sales_regression)
##
## Call:
## lm(formula = Sales ~ Price + Urban_yes + Urban_no + US_yes +
## US_no, data = carseats_new)
##
## Residuals:
## Min 1Q Median 3Q Max
## -6.9206 -1.6220 -0.0564 1.5786 7.0581
##
## Coefficients: (2 not defined because of singularities)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 13.043469 0.651012 20.036 < 2e-16 ***
## Price -0.054459 0.005242 -10.389 < 2e-16 ***
## Urban_yes -0.021916 0.271650 -0.081 0.936
## Urban_no NA NA NA NA
## US_yes 1.200573 0.259042 4.635 4.86e-06 ***
## US_no NA NA NA NA
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.472 on 396 degrees of freedom
## Multiple R-squared: 0.2393, Adjusted R-squared: 0.2335
## F-statistic: 41.52 on 3 and 396 DF, p-value: < 2.2e-16
sales_regression_smaller <- lm(Sales ~ Price + US_yes, data = carseats_new)
summary(sales_regression_smaller)
##
## Call:
## lm(formula = Sales ~ Price + US_yes, data = carseats_new)
##
## Residuals:
## Min 1Q Median 3Q Max
## -6.9269 -1.6286 -0.0574 1.5766 7.0515
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 13.03079 0.63098 20.652 < 2e-16 ***
## Price -0.05448 0.00523 -10.416 < 2e-16 ***
## US_yes 1.19964 0.25846 4.641 4.71e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.469 on 397 degrees of freedom
## Multiple R-squared: 0.2393, Adjusted R-squared: 0.2354
## F-statistic: 62.43 on 2 and 397 DF, p-value: < 2.2e-16
#confint(sales_regression, intercept, level = 0.95)
confint(sales_regression, 'Price', level = 0.95)
## 2.5 % 97.5 %
## Price -0.06476419 -0.04415351
confint(sales_regression, 'Urban_yes', level = 0.95)
## 2.5 % 97.5 %
## Urban_yes -0.5559732 0.5121409
confint(sales_regression, 'US_yes', level = 0.95)
## 2.5 % 97.5 %
## US_yes 0.6913042 1.709841
plot(sales_regression)




Question 4
x <- rnorm(100)
y <- rnorm (100)
sum(x^2)
## [1] 92.97771
sum(y^2)
## [1] 95.77641
lm (y ~ x)
##
## Call:
## lm(formula = y ~ x)
##
## Coefficients:
## (Intercept) x
## -0.04572 0.07586
lm (x ~ y)
##
## Call:
## lm(formula = x ~ y)
##
## Coefficients:
## (Intercept) y
## 0.08322 0.07325
x <- rnorm(100)
y <- - sample(x)
sum(x^2)
## [1] 99.52325
sum(y^2)
## [1] 99.52325
lm (y ~ x)
##
## Call:
## lm(formula = y ~ x)
##
## Coefficients:
## (Intercept) x
## -0.13059 -0.07878
lm (x ~ y)
##
## Call:
## lm(formula = x ~ y)
##
## Coefficients:
## (Intercept) y
## 0.13059 -0.07878