Using the data “Credit”
Analyze, graphically, the relationship between Education and Income
Suggest which model could you train to better fit the relationship
Fit the following models and analyze their \(\mathrm{R}^2\)
M1
\[ Income_i=\beta_0+\beta_1 Education_i+u_i \]
M2
\[ Income_i=\beta_0+\beta_1 Education_i+\beta_2 Education_i^2+u_i \]
M3
\[ Income_i=\beta_0+\beta_1 Education_i+\beta_2 Education_i^2+\beta_3 Education_i^3+u_i \]
M4
\[ Income_i=\beta_0+\beta_1 Education_i+\beta_2 Education_i^2+\beta_3 Education_i^3+\beta_3 Education_i^4+u_i \]
M5
\[ Income_i=\beta_0+\beta_1 Education_i+\beta_2 Education_i^2+\beta_3 Education_i^3+\beta_3 Education_i^4+...+\beta_6 Education_i^7+u_i \]
M6
\[ Income_i=\beta_0+\beta_1 Education_i+\beta_2 Education_i^2+\beta_3 Education_i^3+\beta_3 Education_i^4+...+\beta_6 Education_i^7+...+\beta_{10} Education_i^{11}+u_i \]
library(ISLR)
attach(Credit)
plot(Age,Income)
mod1<-lm(Income~Age)
mod2<-lm(Income~Age+I(Age^2))
mod3<-lm(Income~Age+I(Age^2)+I(Age^3))
mod4<-lm(Income~Age+I(Age^2)+I(Age^3)+I(Age^4))
mod5<-lm(Income~Age+I(Age^2)+I(Age^3)+I(Age^4)+I(Age^5)+I(Age^6)+I(Age^7))
mod6<-lm(Income~Age+I(Age^2)+I(Age^3)+I(Age^4)+I(Age^5)+I(Age^6)+I(Age^7)+I(Age^8)+I(Age^9)+I(Age^10)+I(Age^11))
summary(mod1)
##
## Call:
## lm(formula = Income ~ Age)
##
## Residuals:
## Min 1Q Median 3Q Max
## -41.24 -23.49 -10.69 10.29 146.67
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 25.2762 5.8755 4.302 2.13e-05 ***
## Age 0.3582 0.1008 3.553 0.000426 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 34.74 on 398 degrees of freedom
## Multiple R-squared: 0.03074, Adjusted R-squared: 0.02831
## F-statistic: 12.62 on 1 and 398 DF, p-value: 0.0004265
summary(mod2)
##
## Call:
## lm(formula = Income ~ Age + I(Age^2))
##
## Residuals:
## Min 1Q Median 3Q Max
## -44.61 -23.29 -11.60 11.82 147.76
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 64.228025 17.935851 3.581 0.000385 ***
## Age -1.186126 0.679755 -1.745 0.081771 .
## I(Age^2) 0.013847 0.006028 2.297 0.022133 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 34.56 on 397 degrees of freedom
## Multiple R-squared: 0.04346, Adjusted R-squared: 0.03864
## F-statistic: 9.018 on 2 and 397 DF, p-value: 0.0001479
summary(mod3)
##
## Call:
## lm(formula = Income ~ Age + I(Age^2) + I(Age^3))
##
## Residuals:
## Min 1Q Median 3Q Max
## -45.48 -22.55 -10.02 13.28 141.90
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.880e+02 4.835e+01 -3.888 0.000119 ***
## Age 1.429e+01 2.847e+00 5.019 7.86e-07 ***
## I(Age^2) -2.795e-01 5.283e-02 -5.290 2.03e-07 ***
## I(Age^3) 1.741e-03 3.116e-04 5.586 4.33e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 33.31 on 396 degrees of freedom
## Multiple R-squared: 0.1133, Adjusted R-squared: 0.1066
## F-statistic: 16.87 on 3 and 396 DF, p-value: 2.484e-10
summary(mod4)
##
## Call:
## lm(formula = Income ~ Age + I(Age^2) + I(Age^3) + I(Age^4))
##
## Residuals:
## Min 1Q Median 3Q Max
## -43.27 -21.99 -10.35 12.17 141.01
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.732e+01 1.307e+02 0.133 0.8946
## Age -2.574e+00 1.038e+01 -0.248 0.8042
## I(Age^2) 2.079e-01 2.932e-01 0.709 0.4787
## I(Age^3) -4.177e-03 3.516e-03 -1.188 0.2355
## I(Age^4) 2.568e-05 1.520e-05 1.690 0.0919 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 33.24 on 395 degrees of freedom
## Multiple R-squared: 0.1197, Adjusted R-squared: 0.1108
## F-statistic: 13.43 on 4 and 395 DF, p-value: 2.87e-10
summary(mod5)
##
## Call:
## lm(formula = Income ~ Age + I(Age^2) + I(Age^3) + I(Age^4) +
## I(Age^5) + I(Age^6) + I(Age^7))
##
## Residuals:
## Min 1Q Median 3Q Max
## -41.27 -22.00 -10.20 12.76 138.54
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5.236e+03 3.112e+03 1.683 0.0933 .
## Age -7.789e+02 4.446e+02 -1.752 0.0806 .
## I(Age^2) 4.772e+01 2.624e+01 1.819 0.0697 .
## I(Age^3) -1.557e+00 8.306e-01 -1.874 0.0616 .
## I(Age^4) 2.933e-02 1.526e-02 1.922 0.0553 .
## I(Age^5) -3.204e-04 1.631e-04 -1.964 0.0502 .
## I(Age^6) 1.882e-06 9.407e-07 2.001 0.0461 *
## I(Age^7) -4.593e-09 2.263e-09 -2.030 0.0431 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 33.17 on 392 degrees of freedom
## Multiple R-squared: 0.1299, Adjusted R-squared: 0.1144
## F-statistic: 8.364 on 7 and 392 DF, p-value: 1.553e-09
par(mfrow=c(3, 2))
plot(Age,Income)
lines(sort(Age),fitted(mod1)[order(Age)],col="green")
plot(Age,Income)
lines(sort(Age),fitted(mod2)[order(Age)],col="red")
plot(Age,Income)
lines(sort(Age),fitted(mod3)[order(Age)],col="orange")
plot(Age,Income)
lines(sort(Age),fitted(mod4)[order(Age)],col="purple")
plot(Age,Income)
lines(sort(Age),fitted(mod5)[order(Age)],col="pink")
plot(Age,Income)
lines(sort(Age),fitted(mod6)[order(Age)],col="yellow")
as you can see, the \(R^2\) increases for each model. But:
- How do you interpret model 1,2, and 3? Which of these models make sense in economic and social terms?
- Perform a cross-validation scheme. Which model predicts better?
library(ISLR)
attach(Credit)
## The following objects are masked from Credit (pos = 3):
##
## Age, Balance, Cards, Education, Ethnicity, Gender, ID, Income,
## Limit, Married, Rating, Student
library(caret)
## Loading required package: ggplot2
## Loading required package: lattice
library(ggplot2)
library(lattice)
mod1<-lm(Income~Age)
mod2<-lm(Income~Age+I(Age^2))
mod3<-lm(Income~Age+I(Age^2)+I(Age^3))
mod4<-lm(Income~Age+I(Age^2)+I(Age^3)+I(Age^4))
mod5<-lm(Income~Age+I(Age^2)+I(Age^3)+I(Age^4)+I(Age^5)+I(Age^6)+I(Age^7))
mod6<-lm(Income~Age+I(Age^2)+I(Age^3)+I(Age^4)+I(Age^5)+I(Age^6)+I(Age^7)+I(Age^8)+I(Age^9)+I(Age^10)+I(Age^11))
train_control<- trainControl(method="cv", number=20,p=0.75, savePredictions = TRUE)
model1_cv<- train(Income~Age, data=Credit, trControl=train_control, method = "lm" )
model1_cv
## Linear Regression
##
## 400 samples
## 1 predictor
##
## No pre-processing
## Resampling: Cross-Validated (20 fold)
## Summary of sample sizes: 380, 380, 380, 380, 380, 380, ...
## Resampling results:
##
## RMSE Rsquared MAE
## 34.25382 0.06069146 25.61557
##
## Tuning parameter 'intercept' was held constant at a value of TRUE
model2_cv<- train(Income~Age+I(Age^2), data=Credit, trControl=train_control, method = "lm" )
model2_cv
## Linear Regression
##
## 400 samples
## 1 predictor
##
## No pre-processing
## Resampling: Cross-Validated (20 fold)
## Summary of sample sizes: 380, 380, 380, 380, 380, 380, ...
## Resampling results:
##
## RMSE Rsquared MAE
## 34.01322 0.08911568 25.6452
##
## Tuning parameter 'intercept' was held constant at a value of TRUE
model3_cv<- train(Income~Age+I(Age^2)+I(Age^3), data=Credit, trControl=train_control, method = "lm" )
model3_cv
## Linear Regression
##
## 400 samples
## 1 predictor
##
## No pre-processing
## Resampling: Cross-Validated (20 fold)
## Summary of sample sizes: 380, 380, 380, 380, 380, 380, ...
## Resampling results:
##
## RMSE Rsquared MAE
## 32.37296 0.1702583 24.75844
##
## Tuning parameter 'intercept' was held constant at a value of TRUE
model4_cv<- train(Income~Age+I(Age^2)+I(Age^3)+I(Age^4), data=Credit, trControl=train_control, method = "lm" )
model4_cv
## Linear Regression
##
## 400 samples
## 1 predictor
##
## No pre-processing
## Resampling: Cross-Validated (20 fold)
## Summary of sample sizes: 380, 380, 380, 380, 380, 380, ...
## Resampling results:
##
## RMSE Rsquared MAE
## 32.68155 0.143395 24.70603
##
## Tuning parameter 'intercept' was held constant at a value of TRUE
model5_cv<- train(Income~Age+I(Age^2)+I(Age^3)+I(Age^4)+I(Age^5), data=Credit, trControl=train_control, method = "lm" )
model5_cv
## Linear Regression
##
## 400 samples
## 1 predictor
##
## No pre-processing
## Resampling: Cross-Validated (20 fold)
## Summary of sample sizes: 380, 380, 380, 380, 380, 380, ...
## Resampling results:
##
## RMSE Rsquared MAE
## 33.20629 0.177188 24.92997
##
## Tuning parameter 'intercept' was held constant at a value of TRUE
model6_cv<- train(Income~Age+I(Age^2)+I(Age^3)+I(Age^4)+I(Age^5)+I(Age^6), data=Credit, trControl=train_control, method = "lm" )
model6_cv
## Linear Regression
##
## 400 samples
## 1 predictor
##
## No pre-processing
## Resampling: Cross-Validated (20 fold)
## Summary of sample sizes: 380, 380, 380, 380, 380, 380, ...
## Resampling results:
##
## RMSE Rsquared MAE
## 33.89708 0.1335464 25.22238
##
## Tuning parameter 'intercept' was held constant at a value of TRUE
Errors<-data.frame(model1_cv$results$RMSE,model2_cv$results$RMSE,model3_cv$results$RMSE,model4_cv$results$RMSE,model5_cv$results$RMSE,model6_cv$results$RMSE)
Errors
## model1_cv.results.RMSE model2_cv.results.RMSE model3_cv.results.RMSE
## 1 34.25382 34.01322 32.37296
## model4_cv.results.RMSE model5_cv.results.RMSE model6_cv.results.RMSE
## 1 32.68155 33.20629 33.89708
One interesting question is, when we have a predictive question and we have data, How we should transform it to get a better model? There are different approaches. Here we’ll follow an interesting one: graphical analysis always you feel it is possible. Let’s see different examples.
Idea 1 Keep a linear model if you don’t find strong evidence (KISS principle: Keep it as simple as possible)
Idea 2
If you need to estimate elasticity, use logarithms (because you have a purpose to do it)
Idea 3
Use graphical analysis. Some examples with the advertising data set:
library(readr)
Advertising <- read_csv("Advertising.csv")
## New names:
## Rows: 200 Columns: 5
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," dbl
## (5): ...1, TV, radio, newspaper, sales
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `` -> `...1`
View(Advertising)
plot(Advertising$TV,Advertising$sales)
As you can see, it looks like a logarithmic relationship
\[ sales=\beta_0+\beta_1 \log TV_i+u_i, \]
However, as we can see, the error grows with the values of advertising. This is called “Heteroskedasticity”, meaning that the variance of the error is not constant and it is “heterogeneous”
In this case, it usually works well to transform both sides using logarithms:
\[ \log sales=\beta_0+\beta_1 \log TV_i+u_i, \]
library(ISLR)
attach(Credit)
## The following objects are masked from Credit (pos = 7):
##
## Age, Balance, Cards, Education, Ethnicity, Gender, ID, Income,
## Limit, Married, Rating, Student
## The following objects are masked from Credit (pos = 8):
##
## Age, Balance, Cards, Education, Ethnicity, Gender, ID, Income,
## Limit, Married, Rating, Student
Advertising <- read_csv("Advertising.csv")
## New names:
## • `` -> `...1`
## Rows: 200 Columns: 5
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (5): ...1, TV, radio, newspaper, sales
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
plot(log(Advertising$TV),log(Advertising$sales))
As you can see, this transformation allow us to have a linear model.
Using the data “Advertising”
How do you interpret all the elements of the output? What should we do-in your opinion- with a non-significant variable?
Use the “predict function” with the sample. Compare the result with the fitted values
Use the “predict” function to forecast sales if we invest \(TV=100\), \(radio=40\) and \(newspaper=24.2\). How do you interpret the output?
#a)
library(readr)
Advertising <- read_csv("Advertising.csv")
## New names:
## Rows: 200 Columns: 5
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," dbl
## (5): ...1, TV, radio, newspaper, sales
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `` -> `...1`
View(Advertising)
m1<-lm(log(sales)~log(TV)+log(radio+1)+log(newspaper), data=Advertising)
summary(m1)
##
## Call:
## lm(formula = log(sales) ~ log(TV) + log(radio + 1) + log(newspaper),
## data = Advertising)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.47874 -0.06376 -0.01975 0.04522 0.35822
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.297808 0.043143 6.903 6.91e-11 ***
## log(TV) 0.351882 0.006977 50.436 < 2e-16 ***
## log(radio + 1) 0.202757 0.007885 25.714 < 2e-16 ***
## log(newspaper) 0.011780 0.007350 1.603 0.111
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.09881 on 196 degrees of freedom
## Multiple R-squared: 0.944, Adjusted R-squared: 0.9431
## F-statistic: 1101 on 3 and 196 DF, p-value: < 2.2e-16
We should be cautious with non significant variables. A variable can be non significant if the standard error is high (even the effect is relevant). We recommend you to do cross-validation with the “non significant” variable and without it and make a decision.
#b)
library(readr)
Advertising <- read_csv("Advertising.csv")
## New names:
## Rows: 200 Columns: 5
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," dbl
## (5): ...1, TV, radio, newspaper, sales
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `` -> `...1`
predictors<-data.frame(TV=(Advertising$TV),radio=(Advertising$radio),newspaper=(Advertising$newspaper))
predict(m1, newdata=predictors)
## 1 2 3 4 5 6 7 8
## 3.0032021 2.4276993 2.1290192 2.8716735 2.6750109 1.9026841 2.4745431 2.6252954
## 9 10 11 12 13 14 15 16
## 1.2843731 2.4575302 2.1988071 2.8561252 2.1896429 2.3689129 2.9288265 2.9885959
## 17 18 19 20 21 22 23 24
## 2.5727301 3.0806822 2.4450423 2.7411503 2.9206498 2.6263432 1.8249841 2.8321409
## 25 26 27 28 29 30 31 32
## 2.3152216 2.5983658 2.7653759 2.8460100 2.9522501 2.4139196 3.0256263 2.5945111
## 33 34 35 36 37 38 39 40
## 2.1341392 2.8651240 2.1039013 2.6493327 3.0536093 2.6554711 2.3374806 2.9903674
## 41 42 43 44 45 46 47 48
## 2.8456079 2.8796304 2.9848345 2.6670027 2.1422520 2.7961461 2.4064860 3.0208061
## 49 50 51 52 53 54 55 56
## 2.8251714 2.3346357 2.4896697 2.4134562 2.9944463 2.9596379 2.9789918 3.0032836
## 57 58 59 60 61 62 63 64
## 1.7246040 2.6695152 3.0190598 2.8997648 1.9570221 3.0692893 2.8326715 2.6463591
## 65 66 67 68 69 70 71 72
## 2.8195410 2.2593296 2.1785410 2.6179989 2.9299743 3.0008578 2.9038180 2.5449859
## 73 74 75 76 77 78 79 80
## 2.2047994 2.4352066 2.8727713 2.1160862 1.6934399 2.7013646 1.6132251 2.4461243
## 81 82 83 84 85 86 87 88
## 2.5335753 2.5988946 2.4796396 2.6007880 2.9939141 2.8005456 2.5349764 2.7587934
## 89 90 91 92 93 94 95 96
## 2.5895668 2.7858525 2.4082078 1.7047824 2.9580148 3.0272605 2.5206186 2.8440674
## 97 98 99 100 101 102 103 104
## 2.4838112 2.7977071 3.1029519 2.8306696 2.5837191 3.0887455 2.8049491 2.7624942
## 105 106 107 108 109 110 111 112
## 2.9659423 2.8617577 1.9742500 1.9730024 1.3094819 2.9432121 2.7023645 3.0086778
## 113 114 115 116 117 118 119 120
## 2.6940871 2.8296169 2.6575327 2.5908070 2.6259527 1.9744806 2.7872802 1.9522578
## 121 122 123 124 125 126 127 128
## 2.7592592 2.0094358 2.4825581 2.7453889 2.9721111 2.4253389 1.8142776 1.8667831
## 129 130 131 132 133 134 135 136
## 3.0030975 2.3005735 0.9487481 2.5817315 1.7325039 2.9582282 2.3626608 2.4723271
## 137 138 139 140 141 142 143 144
## 2.2130314 3.0096899 2.3243864 2.9121919 2.4256311 2.9307025 2.9555274 2.3614510
## 145 146 147 148 149 150 151 152
## 2.5073873 2.2791948 2.6810598 3.0688516 2.3614113 2.3373322 2.8717211 2.4854504
## 153 154 155 156 157 158 159 160
## 2.8360862 2.9019017 2.7942107 1.3285349 2.7118970 2.2669503 1.9451971 2.6581510
## 161 162 163 164 165 166 167 168
## 2.7485449 2.6409375 2.7774304 2.8513357 2.5523197 2.5708977 2.0898339 2.5788240
## 169 170 171 172 173 174 175 176
## 2.8854195 2.8047749 2.2224083 2.7646887 1.9964778 2.5558455 2.5302549 3.1134259
## 177 178 179 180 181 182 183 184
## 2.9714824 2.5883092 2.5557799 2.6157468 2.3607577 2.6086938 2.1411221 3.1076267
## 185 186 187 188 189 190 191 192
## 2.9155588 2.9826523 2.3034703 2.8679274 2.8511793 1.8870548 2.3704583 2.3409216
## 193 194 195 196 197 198 199 200
## 1.6698963 2.8760129 2.8112953 1.9243518 2.2817834 2.6139252 3.0970805 2.6986382
m1$fitted.values
## 1 2 3 4 5 6 7 8
## 3.0032021 2.4276993 2.1290192 2.8716735 2.6750109 1.9026841 2.4745431 2.6252954
## 9 10 11 12 13 14 15 16
## 1.2843731 2.4575302 2.1988071 2.8561252 2.1896429 2.3689129 2.9288265 2.9885959
## 17 18 19 20 21 22 23 24
## 2.5727301 3.0806822 2.4450423 2.7411503 2.9206498 2.6263432 1.8249841 2.8321409
## 25 26 27 28 29 30 31 32
## 2.3152216 2.5983658 2.7653759 2.8460100 2.9522501 2.4139196 3.0256263 2.5945111
## 33 34 35 36 37 38 39 40
## 2.1341392 2.8651240 2.1039013 2.6493327 3.0536093 2.6554711 2.3374806 2.9903674
## 41 42 43 44 45 46 47 48
## 2.8456079 2.8796304 2.9848345 2.6670027 2.1422520 2.7961461 2.4064860 3.0208061
## 49 50 51 52 53 54 55 56
## 2.8251714 2.3346357 2.4896697 2.4134562 2.9944463 2.9596379 2.9789918 3.0032836
## 57 58 59 60 61 62 63 64
## 1.7246040 2.6695152 3.0190598 2.8997648 1.9570221 3.0692893 2.8326715 2.6463591
## 65 66 67 68 69 70 71 72
## 2.8195410 2.2593296 2.1785410 2.6179989 2.9299743 3.0008578 2.9038180 2.5449859
## 73 74 75 76 77 78 79 80
## 2.2047994 2.4352066 2.8727713 2.1160862 1.6934399 2.7013646 1.6132251 2.4461243
## 81 82 83 84 85 86 87 88
## 2.5335753 2.5988946 2.4796396 2.6007880 2.9939141 2.8005456 2.5349764 2.7587934
## 89 90 91 92 93 94 95 96
## 2.5895668 2.7858525 2.4082078 1.7047824 2.9580148 3.0272605 2.5206186 2.8440674
## 97 98 99 100 101 102 103 104
## 2.4838112 2.7977071 3.1029519 2.8306696 2.5837191 3.0887455 2.8049491 2.7624942
## 105 106 107 108 109 110 111 112
## 2.9659423 2.8617577 1.9742500 1.9730024 1.3094819 2.9432121 2.7023645 3.0086778
## 113 114 115 116 117 118 119 120
## 2.6940871 2.8296169 2.6575327 2.5908070 2.6259527 1.9744806 2.7872802 1.9522578
## 121 122 123 124 125 126 127 128
## 2.7592592 2.0094358 2.4825581 2.7453889 2.9721111 2.4253389 1.8142776 1.8667831
## 129 130 131 132 133 134 135 136
## 3.0030975 2.3005735 0.9487481 2.5817315 1.7325039 2.9582282 2.3626608 2.4723271
## 137 138 139 140 141 142 143 144
## 2.2130314 3.0096899 2.3243864 2.9121919 2.4256311 2.9307025 2.9555274 2.3614510
## 145 146 147 148 149 150 151 152
## 2.5073873 2.2791948 2.6810598 3.0688516 2.3614113 2.3373322 2.8717211 2.4854504
## 153 154 155 156 157 158 159 160
## 2.8360862 2.9019017 2.7942107 1.3285349 2.7118970 2.2669503 1.9451971 2.6581510
## 161 162 163 164 165 166 167 168
## 2.7485449 2.6409375 2.7774304 2.8513357 2.5523197 2.5708977 2.0898339 2.5788240
## 169 170 171 172 173 174 175 176
## 2.8854195 2.8047749 2.2224083 2.7646887 1.9964778 2.5558455 2.5302549 3.1134259
## 177 178 179 180 181 182 183 184
## 2.9714824 2.5883092 2.5557799 2.6157468 2.3607577 2.6086938 2.1411221 3.1076267
## 185 186 187 188 189 190 191 192
## 2.9155588 2.9826523 2.3034703 2.8679274 2.8511793 1.8870548 2.3704583 2.3409216
## 193 194 195 196 197 198 199 200
## 1.6698963 2.8760129 2.8112953 1.9243518 2.2817834 2.6139252 3.0970805 2.6986382
#c)
library(readr)
Advertising <- read_csv("Advertising.csv")
## New names:
## Rows: 200 Columns: 5
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," dbl
## (5): ...1, TV, radio, newspaper, sales
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `` -> `...1`
predictors<-data.frame(TV=100,radio=40,newspaper=24.2)
predict(m1, newdata=predictors, interval="predict")
## fit lwr upr
## 1 2.70877 2.512999 2.90454