Example Wage

library(ISLR)
library(caret)

## Loading required package: lattice
## Loading required package: ggplot2

data(Wage)
Wage<- subset(Wage,select=-c(logwage))
#Split the data
inBuild<- createDataPartition(y=Wage$wage,p=0.7,list=FALSE)
validation<- Wage[inBuild,]
buildData<- Wage[-inBuild,]
inTrain<- createDataPartition(y=buildData$wage,p=0.7,list=FALSE)
training<- buildData[inTrain,]
testing<- buildData[-inTrain,]

Build two different models

mod1<- train(wage~.,method="glm",data=training)

## Warning in predict.lm(object, newdata, se.fit, scale = 1, type =
## ifelse(type == : prediction from a rank-deficient fit may be misleading

## Warning in predict.lm(object, newdata, se.fit, scale = 1, type =
## ifelse(type == : prediction from a rank-deficient fit may be misleading

## Warning in predict.lm(object, newdata, se.fit, scale = 1, type =
## ifelse(type == : prediction from a rank-deficient fit may be misleading

## Warning in predict.lm(object, newdata, se.fit, scale = 1, type =
## ifelse(type == : prediction from a rank-deficient fit may be misleading

## Warning in predict.lm(object, newdata, se.fit, scale = 1, type =
## ifelse(type == : prediction from a rank-deficient fit may be misleading

## Warning in predict.lm(object, newdata, se.fit, scale = 1, type =
## ifelse(type == : prediction from a rank-deficient fit may be misleading

## Warning in predict.lm(object, newdata, se.fit, scale = 1, type =
## ifelse(type == : prediction from a rank-deficient fit may be misleading

## Warning in predict.lm(object, newdata, se.fit, scale = 1, type =
## ifelse(type == : prediction from a rank-deficient fit may be misleading

## Warning in predict.lm(object, newdata, se.fit, scale = 1, type =
## ifelse(type == : prediction from a rank-deficient fit may be misleading

## Warning in predict.lm(object, newdata, se.fit, scale = 1, type =
## ifelse(type == : prediction from a rank-deficient fit may be misleading

## Warning in predict.lm(object, newdata, se.fit, scale = 1, type =
## ifelse(type == : prediction from a rank-deficient fit may be misleading

## Warning in predict.lm(object, newdata, se.fit, scale = 1, type =
## ifelse(type == : prediction from a rank-deficient fit may be misleading

## Warning in predict.lm(object, newdata, se.fit, scale = 1, type =
## ifelse(type == : prediction from a rank-deficient fit may be misleading

## Warning in predict.lm(object, newdata, se.fit, scale = 1, type =
## ifelse(type == : prediction from a rank-deficient fit may be misleading

## Warning in predict.lm(object, newdata, se.fit, scale = 1, type =
## ifelse(type == : prediction from a rank-deficient fit may be misleading

## Warning in predict.lm(object, newdata, se.fit, scale = 1, type =
## ifelse(type == : prediction from a rank-deficient fit may be misleading

## Warning in predict.lm(object, newdata, se.fit, scale = 1, type =
## ifelse(type == : prediction from a rank-deficient fit may be misleading

## Warning in predict.lm(object, newdata, se.fit, scale = 1, type =
## ifelse(type == : prediction from a rank-deficient fit may be misleading

## Warning in predict.lm(object, newdata, se.fit, scale = 1, type =
## ifelse(type == : prediction from a rank-deficient fit may be misleading

## Warning in predict.lm(object, newdata, se.fit, scale = 1, type =
## ifelse(type == : prediction from a rank-deficient fit may be misleading

## Warning in predict.lm(object, newdata, se.fit, scale = 1, type =
## ifelse(type == : prediction from a rank-deficient fit may be misleading

## Warning in predict.lm(object, newdata, se.fit, scale = 1, type =
## ifelse(type == : prediction from a rank-deficient fit may be misleading

## Warning in predict.lm(object, newdata, se.fit, scale = 1, type =
## ifelse(type == : prediction from a rank-deficient fit may be misleading

## Warning in predict.lm(object, newdata, se.fit, scale = 1, type =
## ifelse(type == : prediction from a rank-deficient fit may be misleading

## Warning in predict.lm(object, newdata, se.fit, scale = 1, type =
## ifelse(type == : prediction from a rank-deficient fit may be misleading

#Linear regression model
mod2<- train(wage~.,method="rf",data=training,trcontrol=trainControl(method="CV"),number=3)

## Loading required package: randomForest
## randomForest 4.6-10
## Type rfNews() to see new features/changes/bug fixes.

#Random forest model

Predict on the testing set

pred1<- predict(mod1,testing)

## Warning in predict.lm(object, newdata, se.fit, scale = 1, type =
## ifelse(type == : prediction from a rank-deficient fit may be misleading

pred2<- predict(mod2,testing)
qplot(pred1,pred2,col=wage,data=testing)

#Fit a model that combines predictors

predDF<- data.frame(pred1,pred2,wage=testing$wage)
combModFit<- train(wage~.,method="gam",data=predDF)

## Loading required package: mgcv
## Loading required package: nlme
## This is mgcv 1.8-6. For overview type 'help("mgcv-package")'.

#Combine the model
combPred<- predict(combModFit,predDF)

Testing errors

sqrt(sum((pred1-testing$wage)^2))

## [1] 563.3893

sqrt(sum((pred2-testing$wage)^2))

## [1] 564.9766

sqrt(sum((combPred-testing$wage)^2))

## [1] 542.2817

Predict on validation data set

pred1V<- predict(mod1,validation)

## Warning in predict.lm(object, newdata, se.fit, scale = 1, type =
## ifelse(type == : prediction from a rank-deficient fit may be misleading

pred2V<- predict(mod2,validation)
predVDF<- data.frame(pred1=pred1V,pred2=pred2V)
combPredV<- predict(combModFit,predVDF)

Testing errors

sqrt(sum(pred1V-validation$wage)^2)

## [1] 1047.054

sqrt(sum(pred2V-validation$wage)^2)

## [1] 3819.623

sqrt(sum(combPredV-validation$wage)^2)

## [1] 5722.396

##We can find that the result will be more accuracy after combination