library(MASS)
library(olsrr)
##
## Attaching package: 'olsrr'
## The following object is masked from 'package:MASS':
##
## cement
## The following object is masked from 'package:datasets':
##
## rivers
library(leaps)
library(DAAG)
## Loading required package: lattice
##
## Attaching package: 'DAAG'
## The following object is masked from 'package:MASS':
##
## hills
Data=read.table("http://users.stat.ufl.edu/~rrandles/sta4210/Rclassnotes/data/textdatasets/KutnerData/Chapter%20%209%20Data%20Sets/CH09PR10.txt")
names(Data) = c("y", "x1", "x2", "x3", "x4") #y-Job Proficiency x1-x4-Test number
n=nrow(Data)
cor(Data)
## y x1 x2 x3 x4
## y 1.0000000 0.5144107 0.4970057 0.8970645 0.8693865
## x1 0.5144107 1.0000000 0.1022689 0.1807692 0.3266632
## x2 0.4970057 0.1022689 1.0000000 0.5190448 0.3967101
## x3 0.8970645 0.1807692 0.5190448 1.0000000 0.7820385
## x4 0.8693865 0.3266632 0.3967101 0.7820385 1.0000000
plot(Data)
It appears that all 4 variables are linearly associated with y. From both outputs, we might suspect that x2 and x3 have some multicollinearity with an \(R^2\) of .519, and x3 and x4 have a bigger issue of multicollinearity with an \(R^2\) of .782 (we also see a correlation in the scatterplots).
fit_f = lm(y~x1+x2+x3+x4,data=Data)
summary(fit_f)
##
## Call:
## lm(formula = y ~ x1 + x2 + x3 + x4, data = Data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -5.9779 -3.4506 0.0941 2.4749 5.9959
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -124.38182 9.94106 -12.512 6.48e-11 ***
## x1 0.29573 0.04397 6.725 1.52e-06 ***
## x2 0.04829 0.05662 0.853 0.40383
## x3 1.30601 0.16409 7.959 1.26e-07 ***
## x4 0.51982 0.13194 3.940 0.00081 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.099 on 20 degrees of freedom
## Multiple R-squared: 0.9629, Adjusted R-squared: 0.9555
## F-statistic: 129.7 on 4 and 20 DF, p-value: 5.262e-14
From the summary of the full model, I suspect that we need all variables except test 2 (x2) because all other predictors are significant.
#All possible combinations
fit_n = lm(y~1,data=Data)
fit_x1 = lm(y~x1, data=Data)
fit_x2 = lm(y~x2, data=Data)
fit_x3 = lm(y~x3, data=Data)
fit_x4 = lm(y~x4, data=Data)
fit_x1x2 = lm(y~x1+x2, data=Data)
fit_x1x3 = lm(y~x1+x3, data=Data)
fit_x1x4 = lm(y~x1+x4, data=Data)
fit_x2x3 = lm(y~x2+x3, data=Data)
fit_x2x4 = lm(y~x2+x4, data=Data)
fit_x3x4 = lm(y~x3+x4, data=Data)
fit_x1x2x3 = lm(y~x1+x2+x3, data=Data)
fit_x1x2x4 = lm(y~x1+x2+x4, data=Data)
fit_x1x3x4 = lm(y~x1+x3+x4, data=Data)
fit_x2x3x4 = lm(y~x2+x3+x4, data=Data)
fit_f = lm(y~x1+x2+x3+x4,data=Data)
#1 variable
Rsq <- summary(fit_x1)$r.squared;Rsq
## [1] 0.2646184
AdjRsq <- summary(fit_x1)$adj.r.squared;AdjRsq
## [1] 0.2326452
Rsq <- summary(fit_x2)$r.squared;Rsq
## [1] 0.2470147
AdjRsq <- summary(fit_x2)$adj.r.squared;AdjRsq
## [1] 0.2142762
Rsq <- summary(fit_x3)$r.squared;Rsq
## [1] 0.8047247
AdjRsq <- summary(fit_x3)$adj.r.squared;AdjRsq
## [1] 0.7962344
Rsq <- summary(fit_x4)$r.squared;Rsq
## [1] 0.7558329
AdjRsq <- summary(fit_x4)$adj.r.squared;AdjRsq
## [1] 0.745217
So best model with 1 variable is with y~x3: fit_x3
2 Variables
AdjRsq <- summary(fit_x1x2)$adj.r.squared;AdjRsq
## [1] 0.4154853
AdjRsq <- summary(fit_x1x3)$adj.r.squared;AdjRsq
## [1] 0.9269043
AdjRsq <- summary(fit_x1x4)$adj.r.squared;AdjRsq
## [1] 0.7984716
AdjRsq <- summary(fit_x2x3)$adj.r.squared;AdjRsq
## [1] 0.7884436
AdjRsq <- summary(fit_x2x4)$adj.r.squared;AdjRsq
## [1] 0.7635916
AdjRsq <- summary(fit_x3x4)$adj.r.squared;AdjRsq
## [1] 0.8660988
So best model with 2 variables is with y~x1+x3: fit_x1x3
3 Variables
AdjRsq <- summary(fit_x1x2x3)$adj.r.squared;AdjRsq
## [1] 0.9246779
AdjRsq <- summary(fit_x1x2x4)$adj.r.squared;AdjRsq
## [1] 0.8232664
AdjRsq <- summary(fit_x1x3x4)$adj.r.squared;AdjRsq
## [1] 0.9560482
AdjRsq <- summary(fit_x2x3x4)$adj.r.squared;AdjRsq
## [1] 0.8616797
So best model with 3 variables is with y~x1+x3+x4: fit_x1x3x4
#Comparing all models, n variables, 1 variable, 2 variables, 3 variables, and all 4
AdjRsq <- summary(fit_n)$adj.r.squared;AdjRsq
## [1] 0
AdjRsq <- summary(fit_x3)$adj.r.squared;AdjRsq
## [1] 0.7962344
AdjRsq <- summary(fit_x1x3)$adj.r.squared;AdjRsq
## [1] 0.9269043
AdjRsq <- summary(fit_x1x3x4)$adj.r.squared;AdjRsq
## [1] 0.9560482
AdjRsq <- summary(fit_f)$adj.r.squared;AdjRsq
## [1] 0.9554702
The highest adjusted \(R^2\) is .956 which comes from the model with test 1, 3, and 4. It appears that test 2 should be dropped from the model as we suspected when I started.
Doing the same thing, but in a much quicker way:
ols_step_all_possible(fit_f) # Rsquare, Adjust Rsquare and Mallow' Cp
## # A tibble: 15 x 6
## Index N Predictors `R-Square` `Adj. R-Square` `Mallow's Cp`
## * <int> <int> <chr> <dbl> <dbl> <dbl>
## 1 1 1 x3 0.805 0.796 84.2
## 2 2 1 x4 0.756 0.745 111.
## 3 3 1 x1 0.265 0.233 375.
## 4 4 1 x2 0.247 0.214 385.
## 5 5 2 x1 x3 0.933 0.927 17.1
## 6 6 2 x3 x4 0.877 0.866 47.2
## 7 7 2 x1 x4 0.815 0.798 80.6
## 8 8 2 x2 x3 0.806 0.788 85.5
## 9 9 2 x2 x4 0.783 0.764 97.8
## 10 10 2 x1 x2 0.464 0.415 270.
## 11 11 3 x1 x3 x4 0.962 0.956 3.73
## 12 12 3 x1 x2 x3 0.934 0.925 18.5
## 13 13 3 x2 x3 x4 0.879 0.862 48.2
## 14 14 3 x1 x2 x4 0.845 0.823 66.3
## 15 15 4 x1 x2 x3 x4 0.963 0.955 5
k <- ols_step_all_possible(fit_f)
plot(k)
As I showed, using adjusted \(R^2\) (we are maximizing this), the best model: 1 variable is y~x3 2 variables is y~x1+x3 3 variables is y~x1+x3+x4
Using Mallow’s Cp (we are minimizing this and want Cp \(\approx\) number of predictors p) Best 1 variable, y~x3 \(C_2\) = 84.24 (too high) p=2 Best 2 variables, y~x1+x3 \(C_3\) = 17.11 (too high) Best 3 variable, y~x1+x3+x4 \(C_4\) = 3.727 \(\approx\) 4 so good! All 4 variable, y~x1+x2+x3+x4 \(C_5\) = 5
attach(Data)
k <- as.data.frame(k)
sse1 = (1-k$rsquare)*var(y)*(n-1)
aic1 <- k$aic-n*log(2*pi)-n-2
bic1 <- aic1 + (log(n)-2)*(k[,2]+1)
press1 <- (1-k$predrsq)*var(y)*(n-1)
k1 <- cbind(k,aic1,bic1,press1,sse1)
k1
## mindex n predictors rsquare adjr predrsq cp aic
## 3 1 1 x3 0.8047247 0.7962344 0.7719685 84.246496 183.4155
## 4 2 1 x4 0.7558329 0.7452170 0.7185073 110.597414 189.0015
## 1 3 1 x1 0.2646184 0.2326452 0.1394302 375.344689 216.5649
## 2 4 1 x2 0.2470147 0.2142762 0.1173960 384.832454 217.1563
## 6 5 2 x1 x3 0.9329956 0.9269043 0.9159516 17.112978 158.6741
## 10 6 2 x3 x4 0.8772573 0.8660988 0.8398940 47.153985 173.8075
## 7 7 2 x1 x4 0.8152656 0.7984716 0.7669652 80.565307 184.0282
## 8 8 2 x2 x3 0.8060733 0.7884436 0.7562794 85.519650 185.2422
## 9 9 2 x2 x4 0.7832923 0.7635916 0.7247849 97.797790 188.0189
## 5 10 2 x1 x2 0.4641948 0.4154853 0.2882658 269.780029 210.6495
## 13 11 3 x1 x3 x4 0.9615422 0.9560482 0.9479289 3.727399 146.7942
## 11 12 3 x1 x2 x3 0.9340931 0.9246779 0.9082006 18.521465 160.2613
## 14 13 3 x2 x3 x4 0.8789698 0.8616797 0.8265340 48.231020 175.4562
## 12 14 3 x1 x2 x4 0.8453581 0.8232664 0.7917114 66.346500 181.5830
## 15 15 4 x1 x2 x3 x4 0.9628918 0.9554702 0.9426785 5.000000 147.9011
## sbic sbc msep fpe apc hsp aic1
## 3 108.25599 187.0721 83.57926 83.02020 0.22923626 3.4941163 110.46853
## 4 113.50220 192.6581 104.50528 103.80625 0.28663091 4.3689499 116.05459
## 1 140.11466 220.2216 314.74869 312.64335 0.86327410 13.1583900 143.61801
## 2 140.69464 220.8130 322.28319 320.12745 0.88393929 13.4733777 144.20941
## 6 85.69145 163.5496 31.40962 30.88438 0.08527827 1.3131114 85.72721
## 10 98.35397 178.6830 57.53809 56.57591 0.15621801 2.4054385 100.86053
## 7 107.46607 188.9037 86.59792 85.14979 0.23511651 3.6203144 111.08125
## 8 108.57275 190.1177 90.90701 89.38683 0.24681584 3.8004603 112.29528
## 9 111.12083 192.8944 101.58604 99.88728 0.27580981 4.2469082 115.07201
## 5 132.55326 215.5250 251.16932 246.96916 0.68193384 10.5003895 137.70254
## 13 77.41140 152.8886 19.83065 19.23374 0.05310840 0.8290405 73.84732
## 11 86.76926 166.3556 33.98466 32.96171 0.09101423 1.4207635 87.31433
## 14 98.81455 181.5506 62.40886 60.53034 0.16713700 2.6090663 102.50928
## 12 104.05556 187.6774 79.74060 77.34038 0.21355306 3.3336370 108.63607
## 15 79.32921 155.2144 21.14890 20.15865 0.05566228 0.8841514 74.95421
## bic1 press1 sse1
## 3 112.90629 2064.5976 1768.0228
## 4 118.49234 2548.6349 2210.6887
## 1 146.05576 7791.5994 6658.1453
## 2 146.64717 7991.0964 6817.5291
## 6 89.38384 760.9744 606.6574
## 10 104.51716 1449.6001 1111.3126
## 7 114.73788 2109.8967 1672.5853
## 8 115.95191 2206.6460 1755.8127
## 9 118.72864 2491.7979 1962.0716
## 5 141.35916 6444.0411 4851.1799
## 13 78.72282 471.4520 348.1970
## 11 92.18984 831.1521 596.7207
## 14 107.38479 1570.5610 1095.8078
## 12 113.51157 1885.8454 1400.1275
## 15 81.04859 518.9885 335.9775
We want to minimize AIC Best 1 variable, y~x3 AIC = 183.4 / AIC1 = 110.47 Best 2 variables, y~x1+x3 AIC = 158.67 / AIC1 = 85.72 Best 3 variable, y~x1+x3+x4 AIC = 146.79 / AIC1 = 73.847 All 4 variable, y~x1+x2+x3+x4 AIC = 147.9 / AIC1 = 74.95
We want to minimize BIC Best 1 variable, y~x3 BIC = 112.91 Best 2 variables, y~x1+x3 BIC = 89.28 Best 3 variable, y~x1+x3+x4 BIC = 78.72 All 4 variable, y~x1+x2+x3+x4 BIC = 81.05
We want to minimize PRESS Best 1 variable, y~x3 PRESS = 2064.5976 Best 2 variables, y~x1+x3 PRESS = 760.9744 Best 3 variable, y~x1+x3+x4 PRESS = 471.4520 All 4 variable, y~x1+x2+x3+x4 PRESS = 518.9885
ols_step_forward_p(fit_f,pent=.05)
## Forward Selection Method
## ---------------------------
##
## Candidate Terms:
##
## 1. x1
## 2. x2
## 3. x3
## 4. x4
##
## We are selecting variables based on p value...
##
## Variables Entered:
##
## - x3
## - x1
## - x4
##
## No more variables to be added.
##
## Final Model Output
## ------------------
##
## Model Summary
## --------------------------------------------------------------
## R 0.981 RMSE 4.072
## R-Squared 0.962 Coef. Var 4.416
## Adj. R-Squared 0.956 MSE 16.581
## Pred R-Squared 0.948 MAE 3.096
## --------------------------------------------------------------
## RMSE: Root Mean Square Error
## MSE: Mean Square Error
## MAE: Mean Absolute Error
##
## ANOVA
## ---------------------------------------------------------------------
## Sum of
## Squares DF Mean Square F Sig.
## ---------------------------------------------------------------------
## Regression 8705.803 3 2901.934 175.018 0.0000
## Residual 348.197 21 16.581
## Total 9054.000 24
## ---------------------------------------------------------------------
##
## Parameter Estimates
## -----------------------------------------------------------------------------------------------
## model Beta Std. Error Std. Beta t Sig lower upper
## -----------------------------------------------------------------------------------------------
## (Intercept) -124.200 9.874 -12.578 0.000 -144.734 -103.666
## x3 1.357 0.152 0.619 8.937 0.000 1.041 1.673
## x1 0.296 0.044 0.310 6.784 0.000 0.205 0.387
## x4 0.517 0.131 0.284 3.948 0.001 0.245 0.790
## -----------------------------------------------------------------------------------------------
##
## Selection Summary
## -------------------------------------------------------------------------
## Variable Adj.
## Step Entered R-Square R-Square C(p) AIC RMSE
## -------------------------------------------------------------------------
## 1 x3 0.8047 0.7962 84.2465 183.4155 8.7676
## 2 x1 0.9330 0.9269 17.1130 158.6741 5.2512
## 3 x4 0.9615 0.9560 3.7274 146.7942 4.0720
## -------------------------------------------------------------------------
ols_step_forward_p(fit_f,pent=.1)
## Forward Selection Method
## ---------------------------
##
## Candidate Terms:
##
## 1. x1
## 2. x2
## 3. x3
## 4. x4
##
## We are selecting variables based on p value...
##
## Variables Entered:
##
## - x3
## - x1
## - x4
##
## No more variables to be added.
##
## Final Model Output
## ------------------
##
## Model Summary
## --------------------------------------------------------------
## R 0.981 RMSE 4.072
## R-Squared 0.962 Coef. Var 4.416
## Adj. R-Squared 0.956 MSE 16.581
## Pred R-Squared 0.948 MAE 3.096
## --------------------------------------------------------------
## RMSE: Root Mean Square Error
## MSE: Mean Square Error
## MAE: Mean Absolute Error
##
## ANOVA
## ---------------------------------------------------------------------
## Sum of
## Squares DF Mean Square F Sig.
## ---------------------------------------------------------------------
## Regression 8705.803 3 2901.934 175.018 0.0000
## Residual 348.197 21 16.581
## Total 9054.000 24
## ---------------------------------------------------------------------
##
## Parameter Estimates
## -----------------------------------------------------------------------------------------------
## model Beta Std. Error Std. Beta t Sig lower upper
## -----------------------------------------------------------------------------------------------
## (Intercept) -124.200 9.874 -12.578 0.000 -144.734 -103.666
## x3 1.357 0.152 0.619 8.937 0.000 1.041 1.673
## x1 0.296 0.044 0.310 6.784 0.000 0.205 0.387
## x4 0.517 0.131 0.284 3.948 0.001 0.245 0.790
## -----------------------------------------------------------------------------------------------
##
## Selection Summary
## -------------------------------------------------------------------------
## Variable Adj.
## Step Entered R-Square R-Square C(p) AIC RMSE
## -------------------------------------------------------------------------
## 1 x3 0.8047 0.7962 84.2465 183.4155 8.7676
## 2 x1 0.9330 0.9269 17.1130 158.6741 5.2512
## 3 x4 0.9615 0.9560 3.7274 146.7942 4.0720
## -------------------------------------------------------------------------
Final Model:\[\widehat{Job Proficiency} = -124.2 + 1.357*Test3 + 0.296*Test1 + 0.517*Test4\]
\[\hat{Y} = -124.2 + 1.357x3 + 0.296x1 + 0.517x4\]
ols_step_backward_aic(fit_f)
## Backward Elimination Method
## ---------------------------
##
## Candidate Terms:
##
## 1 . x1
## 2 . x2
## 3 . x3
## 4 . x4
##
##
## Variables Removed:
##
## - x2
##
## No more variables to be removed.
##
##
## Backward Elimination Summary
## --------------------------------------------------------------------
## Variable AIC RSS Sum Sq R-Sq Adj. R-Sq
## --------------------------------------------------------------------
## Full Model 147.901 335.978 8718.022 0.96289 0.95547
## x2 146.794 348.197 8705.803 0.96154 0.95605
## --------------------------------------------------------------------
This yields the same model as part (c)
press(fit_x1x3x4)
## [1] 471.452
SSE <- sigma(fit_x1x3x4)^2*(n-5);SSE
## [1] 331.6162
These values are not close, this suggests that MSE is NOT a valid indicator of the predictive ability of the fitted model.
Data2=read.table("http://users.stat.ufl.edu/~rrandles/sta4210/Rclassnotes/data/textdatasets/KutnerData/Chapter%20%209%20Data%20Sets/CH09PR22.txt")
names(Data2) = c("y", "x1", "x2", "x3", "x4") #y-Job Proficiency x1-x4-Test number
n=row(Data2)
cor(Data)
## y x1 x2 x3 x4
## y 1.0000000 0.5144107 0.4970057 0.8970645 0.8693865
## x1 0.5144107 1.0000000 0.1022689 0.1807692 0.3266632
## x2 0.4970057 0.1022689 1.0000000 0.5190448 0.3967101
## x3 0.8970645 0.1807692 0.5190448 1.0000000 0.7820385
## x4 0.8693865 0.3266632 0.3967101 0.7820385 1.0000000
cor(Data2)
## y x1 x2 x3 x4
## y 1.0000000 0.53707787 0.34477442 0.8880519 0.8879388
## x1 0.5370779 1.00000000 0.01057088 0.1772891 0.3196395
## x2 0.3447744 0.01057088 1.00000000 0.3437441 0.2207638
## x3 0.8880519 0.17728907 0.34374413 1.0000000 0.8714466
## x4 0.8879388 0.31963945 0.22076377 0.8714466 1.0000000
plot(Data)
plot(Data2)
The correlation matrix shows many different values for test 2 (x2), but the other values are pretty close.
fit_x1x3x4v = lm(y~x1+x3+x4, data=Data2)
fit_x1x3x4
##
## Call:
## lm(formula = y ~ x1 + x3 + x4, data = Data)
##
## Coefficients:
## (Intercept) x1 x3 x4
## -124.2000 0.2963 1.3570 0.5174
fit_x1x3x4v
##
## Call:
## lm(formula = y ~ x1 + x3 + x4, data = Data2)
##
## Coefficients:
## (Intercept) x1 x3 x4
## -122.7671 0.3124 1.4068 0.4284
The estimates for all 5 regression coefficients are relatively close for the training data and the validation data.
sigma(fit_x1x3x4)^2 # MSE
## [1] 16.58081
sigma(fit_x1x3x4v)^2
## [1] 18.35493
summary(fit_x1x3x4)
##
## Call:
## lm(formula = y ~ x1 + x3 + x4, data = Data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -5.4579 -3.1563 -0.2057 1.8070 6.6083
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -124.20002 9.87406 -12.578 3.04e-11 ***
## x1 0.29633 0.04368 6.784 1.04e-06 ***
## x3 1.35697 0.15183 8.937 1.33e-08 ***
## x4 0.51742 0.13105 3.948 0.000735 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.072 on 21 degrees of freedom
## Multiple R-squared: 0.9615, Adjusted R-squared: 0.956
## F-statistic: 175 on 3 and 21 DF, p-value: 5.16e-15
summary(fit_x1x3x4v)
##
## Call:
## lm(formula = y ~ x1 + x3 + x4, data = Data2)
##
## Residuals:
## Min 1Q Median 3Q Max
## -9.4619 -2.3836 0.6834 2.1123 7.2394
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -122.76705 11.84783 -10.362 1.04e-09 ***
## x1 0.31238 0.04729 6.605 1.54e-06 ***
## x3 1.40676 0.23262 6.048 5.31e-06 ***
## x4 0.42838 0.19749 2.169 0.0417 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.284 on 21 degrees of freedom
## Multiple R-squared: 0.9489, Adjusted R-squared: 0.9416
## F-statistic: 130 on 3 and 21 DF, p-value: 1.017e-13
The MSE is pretty close for both sets of data.
And the \(R^2\) is pretty close for both models.
From all this, we can say that the estimates from our validation set appear to be reasonably similar to our training data set.
fitY_t1 <- predict(fit_x1x3x4,new<-data.frame(x1=Data2$x1,x3=Data2$x3,x4=Data2$x4))
MSPR <-mean((Data2$y-fitY_t1)^2); MSPR #MSPR
## [1] 15.70972
sigma(fit_x1x3x4)^2 #MSE
## [1] 16.58081
This does NOT show a substantial bias problem. However this is different than our conclusion in 9.21.
total <- rbind(Data, Data2)
names(total) = c("y", "x1", "x2", "x3", "x4") #y-Job Proficiency x1-x4-Test number
n=nrow(total)
fit_x1x3x4t = lm(y~x1+x3+x4, data=total)
summary(fit_x1x3x4)
##
## Call:
## lm(formula = y ~ x1 + x3 + x4, data = Data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -5.4579 -3.1563 -0.2057 1.8070 6.6083
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -124.20002 9.87406 -12.578 3.04e-11 ***
## x1 0.29633 0.04368 6.784 1.04e-06 ***
## x3 1.35697 0.15183 8.937 1.33e-08 ***
## x4 0.51742 0.13105 3.948 0.000735 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.072 on 21 degrees of freedom
## Multiple R-squared: 0.9615, Adjusted R-squared: 0.956
## F-statistic: 175 on 3 and 21 DF, p-value: 5.16e-15
summary(fit_x1x3x4t)
##
## Call:
## lm(formula = y ~ x1 + x3 + x4, data = total)
##
## Residuals:
## Min 1Q Median 3Q Max
## -9.7192 -2.7369 0.1278 2.0971 7.0657
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -123.44104 7.16508 -17.228 < 2e-16 ***
## x1 0.30364 0.03072 9.886 5.86e-13 ***
## x3 1.36906 0.12280 11.148 1.15e-14 ***
## x4 0.48735 0.10475 4.652 2.79e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.006 on 46 degrees of freedom
## Multiple R-squared: 0.9567, Adjusted R-squared: 0.9539
## F-statistic: 338.9 on 3 and 46 DF, p-value: < 2.2e-16
The standard errors on each of the regression coefficients are reduced from the training (model-building) data set.