1. The dataset prostate comes from a study on 97 men with prostate cancer who were due to receive a radical prostatectomy. Fit a model with lpsa as the response and l cavol as the predictor. Record the residual standard error and the R2. Now add lweight, svi, lpph, age, lcp, pgg45 and gleason to the model one at a time. For each model record the residual standard error and the R2. Plot the trends in these two statistics.
library(faraway)
data("prostate")
predictors<-colnames(prostate)
predictors<-predictors[predictors!="lpsa"]
predictors
## [1] "lcavol"  "lweight" "age"     "lbph"    "svi"     "lcp"     "gleason"
## [8] "pgg45"
plot(lpsa~lcavol,prostate)

r<-vector()
s<-vector()

reg<-function(data,independent,predictor,r,s) {
  m<-lm(as.formula(paste(independent,"~",paste(predictor,collapse ="+"))),data)
  print(summary(m))
  r<-c(r,summary(m)$r.squared)
  s<-c(s,(summary(m)$sigma))
  return(list(r,s))
}

for(i in 1:length(predictors)) {
  out<-reg(prostate,"lpsa",head(predictors,i),r,s)
  r<-out[[1]]
  s<-out[[2]]
}
## 
## Call:
## lm(formula = as.formula(paste(independent, "~", paste(predictor, 
##     collapse = "+"))), data = data)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.67625 -0.41648  0.09859  0.50709  1.89673 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  1.50730    0.12194   12.36   <2e-16 ***
## lcavol       0.71932    0.06819   10.55   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.7875 on 95 degrees of freedom
## Multiple R-squared:  0.5394, Adjusted R-squared:  0.5346 
## F-statistic: 111.3 on 1 and 95 DF,  p-value: < 2.2e-16
## 
## 
## Call:
## lm(formula = as.formula(paste(independent, "~", paste(predictor, 
##     collapse = "+"))), data = data)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.61965 -0.50778 -0.02095  0.52291  1.89885 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -0.30262    0.56904  -0.532  0.59612    
## lcavol       0.67753    0.06626  10.225  < 2e-16 ***
## lweight      0.51095    0.15726   3.249  0.00161 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.7506 on 94 degrees of freedom
## Multiple R-squared:  0.5859, Adjusted R-squared:  0.5771 
## F-statistic: 66.51 on 2 and 94 DF,  p-value: < 2.2e-16
## 
## 
## Call:
## lm(formula = as.formula(paste(independent, "~", paste(predictor, 
##     collapse = "+"))), data = data)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.60894 -0.44897 -0.02805  0.45602  1.91756 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.146917   0.772374   0.190  0.84956    
## lcavol       0.687819   0.067418  10.202  < 2e-16 ***
## lweight      0.549941   0.163838   3.357  0.00114 ** 
## age         -0.009486   0.011003  -0.862  0.39082    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.7517 on 93 degrees of freedom
## Multiple R-squared:  0.5892, Adjusted R-squared:  0.576 
## F-statistic: 44.47 on 3 and 93 DF,  p-value: < 2.2e-16
## 
## 
## Call:
## lm(formula = as.formula(paste(independent, "~", paste(predictor, 
##     collapse = "+"))), data = data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.4885 -0.4241 -0.0001  0.4031  1.8073 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.73074    0.87703   0.833   0.4069    
## lcavol       0.69854    0.06754  10.343   <2e-16 ***
## lweight      0.45770    0.17617   2.598   0.0109 *  
## age         -0.01371    0.01137  -1.206   0.2309    
## lbph         0.08404    0.06080   1.382   0.1702    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.748 on 92 degrees of freedom
## Multiple R-squared:  0.5976, Adjusted R-squared:  0.5801 
## F-statistic: 34.15 on 4 and 92 DF,  p-value: < 2.2e-16
## 
## 
## Call:
## lm(formula = as.formula(paste(independent, "~", paste(predictor, 
##     collapse = "+"))), data = data)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.83505 -0.39396  0.00414  0.46336  1.57888 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.95100    0.83175   1.143 0.255882    
## lcavol       0.56561    0.07459   7.583 2.77e-11 ***
## lweight      0.42369    0.16687   2.539 0.012814 *  
## age         -0.01489    0.01075  -1.385 0.169528    
## lbph         0.11184    0.05805   1.927 0.057160 .  
## svi          0.72095    0.20902   3.449 0.000854 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.7073 on 91 degrees of freedom
## Multiple R-squared:  0.6441, Adjusted R-squared:  0.6245 
## F-statistic: 32.94 on 5 and 91 DF,  p-value: < 2.2e-16
## 
## 
## Call:
## lm(formula = as.formula(paste(independent, "~", paste(predictor, 
##     collapse = "+"))), data = data)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.82853 -0.40741  0.01695  0.47159  1.59040 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.93487    0.83577   1.119  0.26630    
## lcavol       0.58765    0.08663   6.783  1.2e-09 ***
## lweight      0.41808    0.16792   2.490  0.01462 *  
## age         -0.01511    0.01081  -1.398  0.16546    
## lbph         0.11381    0.05842   1.948  0.05452 .  
## svi          0.78256    0.24261   3.226  0.00175 ** 
## lcp         -0.04118    0.08135  -0.506  0.61392    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.7102 on 90 degrees of freedom
## Multiple R-squared:  0.6451, Adjusted R-squared:  0.6215 
## F-statistic: 27.27 on 6 and 90 DF,  p-value: < 2.2e-16
## 
## 
## Call:
## lm(formula = as.formula(paste(independent, "~", paste(predictor, 
##     collapse = "+"))), data = data)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.78803 -0.36933  0.00302  0.43436  1.62160 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.02416    1.13313   0.021  0.98304    
## lcavol       0.57471    0.08712   6.597 2.92e-09 ***
## lweight      0.45260    0.17005   2.662  0.00923 ** 
## age         -0.01812    0.01108  -1.636  0.10542    
## lbph         0.10886    0.05844   1.863  0.06579 .  
## svi          0.79783    0.24241   3.291  0.00143 ** 
## lcp         -0.07488    0.08599  -0.871  0.38619    
## gleason      0.14591    0.12292   1.187  0.23837    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.7086 on 89 degrees of freedom
## Multiple R-squared:  0.6506, Adjusted R-squared:  0.6232 
## F-statistic: 23.68 on 7 and 89 DF,  p-value: < 2.2e-16
## 
## 
## Call:
## lm(formula = as.formula(paste(independent, "~", paste(predictor, 
##     collapse = "+"))), data = data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.7331 -0.3713 -0.0170  0.4141  1.6381 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.669337   1.296387   0.516  0.60693    
## lcavol       0.587022   0.087920   6.677 2.11e-09 ***
## lweight      0.454467   0.170012   2.673  0.00896 ** 
## age         -0.019637   0.011173  -1.758  0.08229 .  
## lbph         0.107054   0.058449   1.832  0.07040 .  
## svi          0.766157   0.244309   3.136  0.00233 ** 
## lcp         -0.105474   0.091013  -1.159  0.24964    
## gleason      0.045142   0.157465   0.287  0.77503    
## pgg45        0.004525   0.004421   1.024  0.30886    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.7084 on 88 degrees of freedom
## Multiple R-squared:  0.6548, Adjusted R-squared:  0.6234 
## F-statistic: 20.86 on 8 and 88 DF,  p-value: < 2.2e-16
r
## [1] 0.5394319 0.5859345 0.5892177 0.5975750 0.6441024 0.6451130 0.6506440
## [8] 0.6547541
s
## [1] 0.7874994 0.7506469 0.7516739 0.7480208 0.7073054 0.7102135 0.7086050
## [8] 0.7084155
plot(r,col="red")
par(new=TRUE)
plot(s,col="green")