- The dataset prostate comes from a study on 97 men with prostate cancer who were due to receive a radical prostatectomy. Fit a model with lpsa as the response and l cavol as the predictor. Record the residual standard error and the R2. Now add lweight, svi, lpph, age, lcp, pgg45 and gleason to the model one at a time. For each model record the residual standard error and the R2. Plot the trends in these two statistics.
library(faraway)
data("prostate")
predictors<-colnames(prostate)
predictors<-predictors[predictors!="lpsa"]
predictors
## [1] "lcavol" "lweight" "age" "lbph" "svi" "lcp" "gleason"
## [8] "pgg45"
plot(lpsa~lcavol,prostate)

r<-vector()
s<-vector()
reg<-function(data,independent,predictor,r,s) {
m<-lm(as.formula(paste(independent,"~",paste(predictor,collapse ="+"))),data)
print(summary(m))
r<-c(r,summary(m)$r.squared)
s<-c(s,(summary(m)$sigma))
return(list(r,s))
}
for(i in 1:length(predictors)) {
out<-reg(prostate,"lpsa",head(predictors,i),r,s)
r<-out[[1]]
s<-out[[2]]
}
##
## Call:
## lm(formula = as.formula(paste(independent, "~", paste(predictor,
## collapse = "+"))), data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.67625 -0.41648 0.09859 0.50709 1.89673
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.50730 0.12194 12.36 <2e-16 ***
## lcavol 0.71932 0.06819 10.55 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.7875 on 95 degrees of freedom
## Multiple R-squared: 0.5394, Adjusted R-squared: 0.5346
## F-statistic: 111.3 on 1 and 95 DF, p-value: < 2.2e-16
##
##
## Call:
## lm(formula = as.formula(paste(independent, "~", paste(predictor,
## collapse = "+"))), data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.61965 -0.50778 -0.02095 0.52291 1.89885
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.30262 0.56904 -0.532 0.59612
## lcavol 0.67753 0.06626 10.225 < 2e-16 ***
## lweight 0.51095 0.15726 3.249 0.00161 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.7506 on 94 degrees of freedom
## Multiple R-squared: 0.5859, Adjusted R-squared: 0.5771
## F-statistic: 66.51 on 2 and 94 DF, p-value: < 2.2e-16
##
##
## Call:
## lm(formula = as.formula(paste(independent, "~", paste(predictor,
## collapse = "+"))), data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.60894 -0.44897 -0.02805 0.45602 1.91756
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.146917 0.772374 0.190 0.84956
## lcavol 0.687819 0.067418 10.202 < 2e-16 ***
## lweight 0.549941 0.163838 3.357 0.00114 **
## age -0.009486 0.011003 -0.862 0.39082
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.7517 on 93 degrees of freedom
## Multiple R-squared: 0.5892, Adjusted R-squared: 0.576
## F-statistic: 44.47 on 3 and 93 DF, p-value: < 2.2e-16
##
##
## Call:
## lm(formula = as.formula(paste(independent, "~", paste(predictor,
## collapse = "+"))), data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.4885 -0.4241 -0.0001 0.4031 1.8073
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.73074 0.87703 0.833 0.4069
## lcavol 0.69854 0.06754 10.343 <2e-16 ***
## lweight 0.45770 0.17617 2.598 0.0109 *
## age -0.01371 0.01137 -1.206 0.2309
## lbph 0.08404 0.06080 1.382 0.1702
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.748 on 92 degrees of freedom
## Multiple R-squared: 0.5976, Adjusted R-squared: 0.5801
## F-statistic: 34.15 on 4 and 92 DF, p-value: < 2.2e-16
##
##
## Call:
## lm(formula = as.formula(paste(independent, "~", paste(predictor,
## collapse = "+"))), data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.83505 -0.39396 0.00414 0.46336 1.57888
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.95100 0.83175 1.143 0.255882
## lcavol 0.56561 0.07459 7.583 2.77e-11 ***
## lweight 0.42369 0.16687 2.539 0.012814 *
## age -0.01489 0.01075 -1.385 0.169528
## lbph 0.11184 0.05805 1.927 0.057160 .
## svi 0.72095 0.20902 3.449 0.000854 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.7073 on 91 degrees of freedom
## Multiple R-squared: 0.6441, Adjusted R-squared: 0.6245
## F-statistic: 32.94 on 5 and 91 DF, p-value: < 2.2e-16
##
##
## Call:
## lm(formula = as.formula(paste(independent, "~", paste(predictor,
## collapse = "+"))), data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.82853 -0.40741 0.01695 0.47159 1.59040
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.93487 0.83577 1.119 0.26630
## lcavol 0.58765 0.08663 6.783 1.2e-09 ***
## lweight 0.41808 0.16792 2.490 0.01462 *
## age -0.01511 0.01081 -1.398 0.16546
## lbph 0.11381 0.05842 1.948 0.05452 .
## svi 0.78256 0.24261 3.226 0.00175 **
## lcp -0.04118 0.08135 -0.506 0.61392
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.7102 on 90 degrees of freedom
## Multiple R-squared: 0.6451, Adjusted R-squared: 0.6215
## F-statistic: 27.27 on 6 and 90 DF, p-value: < 2.2e-16
##
##
## Call:
## lm(formula = as.formula(paste(independent, "~", paste(predictor,
## collapse = "+"))), data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.78803 -0.36933 0.00302 0.43436 1.62160
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.02416 1.13313 0.021 0.98304
## lcavol 0.57471 0.08712 6.597 2.92e-09 ***
## lweight 0.45260 0.17005 2.662 0.00923 **
## age -0.01812 0.01108 -1.636 0.10542
## lbph 0.10886 0.05844 1.863 0.06579 .
## svi 0.79783 0.24241 3.291 0.00143 **
## lcp -0.07488 0.08599 -0.871 0.38619
## gleason 0.14591 0.12292 1.187 0.23837
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.7086 on 89 degrees of freedom
## Multiple R-squared: 0.6506, Adjusted R-squared: 0.6232
## F-statistic: 23.68 on 7 and 89 DF, p-value: < 2.2e-16
##
##
## Call:
## lm(formula = as.formula(paste(independent, "~", paste(predictor,
## collapse = "+"))), data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.7331 -0.3713 -0.0170 0.4141 1.6381
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.669337 1.296387 0.516 0.60693
## lcavol 0.587022 0.087920 6.677 2.11e-09 ***
## lweight 0.454467 0.170012 2.673 0.00896 **
## age -0.019637 0.011173 -1.758 0.08229 .
## lbph 0.107054 0.058449 1.832 0.07040 .
## svi 0.766157 0.244309 3.136 0.00233 **
## lcp -0.105474 0.091013 -1.159 0.24964
## gleason 0.045142 0.157465 0.287 0.77503
## pgg45 0.004525 0.004421 1.024 0.30886
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.7084 on 88 degrees of freedom
## Multiple R-squared: 0.6548, Adjusted R-squared: 0.6234
## F-statistic: 20.86 on 8 and 88 DF, p-value: < 2.2e-16
r
## [1] 0.5394319 0.5859345 0.5892177 0.5975750 0.6441024 0.6451130 0.6506440
## [8] 0.6547541
s
## [1] 0.7874994 0.7506469 0.7516739 0.7480208 0.7073054 0.7102135 0.7086050
## [8] 0.7084155
plot(r,col="red")
par(new=TRUE)
plot(s,col="green")
