STAT 388 HW 6

Problem 2.

From the code below, we run several generalized boosted regression models with different shrinkage parameters and number of trees.

From the first simulation, we see the purple line with shrinkage parameter lambda = 0.2 has the lowest CV error. The pink initially dips down below but comes up after about 50 trees.

From the second simulation, we see the blue with shrinkage parameter lambda = 0.1 dips below the purple and pink. The purple and pink start to come back up.

From the third simulation, we see the blue and purple, lambda = 0.1 and lambda = 0.2 respectively, battling for the lowest CV error. Green surpasses the pink though.

From the fourth similation, the most interesting simulation to me personally, I see pink, blue, and purple start bouncing back up . This is because they are now fitting noise and increasing the number of trees is not helping. Green also looks like its starting to creep back up, but it seems to be closest to the minimum CV value.

Based on these four simulations, I built a MSE vs Lambda graph. From this, I see the lowest CV/MSE error is at lambda = 0.01, which is closest to the green! However, this is for 1000 trees and that is the conclusion we had reached before.

Generally, what I have learned is that after a certain number of trees, adding more trees will no longer reduce CV error because the GBM will begin to just fit the noise or epsilon. I would assume the best lambda value would be around 0.02 or 0.025.

require(gbm)

## Loading required package: gbm

## Warning: package 'gbm' was built under R version 3.3.2

## Loading required package: survival

## Loading required package: lattice

## Loading required package: splines

## Loading required package: parallel

## Loaded gbm 2.1.1

kajal_hw6<- read.csv("C:/Users/Kajal/Downloads/HW6.csv")

#SIMULATION 1

nt<-100

gbm1<-gbm(Y~x1+x2+x3+x4+x5+x6, data=kajal_hw6, cv.folds = 3, shrinkage=0.001, n.trees=nt)

## Distribution not specified, assuming gaussian ...

gbm2<-gbm(Y~x1+x2+x3+x4+x5+x6, data=kajal_hw6, cv.folds = 3, shrinkage=0.01, n.trees=nt)

## Distribution not specified, assuming gaussian ...

gbm3<- gbm(Y~x1+x2+x3+x4+x5+x6, data=kajal_hw6, cv.folds=3, shrinkage = 0.1, n.trees=nt)

## Distribution not specified, assuming gaussian ...

gbm4<- gbm(Y~x1+x2+x3+x4+x5+x6, data=kajal_hw6, cv.folds=3, shrinkage = 0.2, n.trees=nt)

## Distribution not specified, assuming gaussian ...

gbm5<- gbm(Y~x1+x2+x3+x4+x5+x6, data=kajal_hw6, cv.folds=3, shrinkage = 0.5, n.trees=nt)

## Distribution not specified, assuming gaussian ...

plot(gbm1$cv.error,ylim=c(0,8.5),col="red",type="l")
points(gbm2$cv.error,col="green",type="l")
points(gbm3$cv.error,col="blue",type="l")
points(gbm4$cv.error,col="purple",type="l")
points(gbm5$cv.error,col="pink",type="l")

#SIMULATION 2

nt<-500

gbm1<-gbm(Y~x1+x2+x3+x4+x5+x6, data=kajal_hw6, cv.folds = 3, shrinkage=0.001, n.trees=nt)

## Distribution not specified, assuming gaussian ...

gbm2<-gbm(Y~x1+x2+x3+x4+x5+x6, data=kajal_hw6, cv.folds = 3, shrinkage=0.01, n.trees=nt)

## Distribution not specified, assuming gaussian ...

gbm3<- gbm(Y~x1+x2+x3+x4+x5+x6, data=kajal_hw6, cv.folds=3, shrinkage = 0.1, n.trees=nt)

## Distribution not specified, assuming gaussian ...

gbm4<- gbm(Y~x1+x2+x3+x4+x5+x6, data=kajal_hw6, cv.folds=3, shrinkage = 0.2, n.trees=nt)

## Distribution not specified, assuming gaussian ...

gbm5<- gbm(Y~x1+x2+x3+x4+x5+x6, data=kajal_hw6, cv.folds=3, shrinkage = 0.5, n.trees=nt)

## Distribution not specified, assuming gaussian ...

plot(gbm1$cv.error,ylim=c(0,8.5),col="red",type="l")
points(gbm2$cv.error,col="green",type="l")
points(gbm3$cv.error,col="blue",type="l")
points(gbm4$cv.error,col="purple",type="l")
points(gbm5$cv.error,col="pink",type="l")

#SIMULATION 3

nt<-1000

gbm1<-gbm(Y~x1+x2+x3+x4+x5+x6, data=kajal_hw6, cv.folds = 3, shrinkage=0.001, n.trees=nt)

## Distribution not specified, assuming gaussian ...

gbm2<-gbm(Y~x1+x2+x3+x4+x5+x6, data=kajal_hw6, cv.folds = 3, shrinkage=0.01, n.trees=nt)

## Distribution not specified, assuming gaussian ...

gbm3<- gbm(Y~x1+x2+x3+x4+x5+x6, data=kajal_hw6, cv.folds=3, shrinkage = 0.1, n.trees=nt)

## Distribution not specified, assuming gaussian ...

gbm4<- gbm(Y~x1+x2+x3+x4+x5+x6, data=kajal_hw6, cv.folds=3, shrinkage = 0.2, n.trees=nt)

## Distribution not specified, assuming gaussian ...

gbm5<- gbm(Y~x1+x2+x3+x4+x5+x6, data=kajal_hw6, cv.folds=3, shrinkage = 0.5, n.trees=nt)

## Distribution not specified, assuming gaussian ...

plot(gbm1$cv.error,ylim=c(0,8.5),col="red",type="l")
points(gbm2$cv.error,col="green",type="l")
points(gbm3$cv.error,col="blue",type="l")
points(gbm4$cv.error,col="purple",type="l")
points(gbm5$cv.error,col="pink",type="l")

#SIMULATION 4

nt<-100000

gbm1<-gbm(Y~x1+x2+x3+x4+x5+x6, data=kajal_hw6, cv.folds = 3, shrinkage=0.001, n.trees=nt)

## Distribution not specified, assuming gaussian ...

gbm2<-gbm(Y~x1+x2+x3+x4+x5+x6, data=kajal_hw6, cv.folds = 3, shrinkage=0.01, n.trees=nt)

## Distribution not specified, assuming gaussian ...

gbm3<- gbm(Y~x1+x2+x3+x4+x5+x6, data=kajal_hw6, cv.folds=3, shrinkage = 0.1, n.trees=nt)

## Distribution not specified, assuming gaussian ...

gbm4<- gbm(Y~x1+x2+x3+x4+x5+x6, data=kajal_hw6, cv.folds=3, shrinkage = 0.2, n.trees=nt)

## Distribution not specified, assuming gaussian ...

gbm5<- gbm(Y~x1+x2+x3+x4+x5+x6, data=kajal_hw6, cv.folds=3, shrinkage = 0.5, n.trees=nt)

## Distribution not specified, assuming gaussian ...

plot(gbm1$cv.error,ylim=c(0,8.5),col="red",type="l")
points(gbm2$cv.error,col="green",type="l")
points(gbm3$cv.error,col="blue",type="l")
points(gbm4$cv.error,col="purple",type="l")
points(gbm5$cv.error,col="pink",type="l")

train <- kajal_hw6[1:500,]
test<-kajal_hw6[501:1000,]


lambda_set <- seq( 1.e-4, 0.3, by=0.001 )


training_set_mse <- rep(NA,length(lambda_set))
test_set_mse <- rep(NA,length(lambda_set))

for( lmi in 1:length(lambda_set) ){
  lm = lambda_set[lmi]
  
  boostkajalset <- gbm( Y ~ ., data=train, distribution="gaussian", n.trees=1000, interaction.depth=4, shrinkage=lm )

  y_hat <- predict(boostkajalset, newdata=train, n.trees=1000 )
  training_set_mse[lmi] <- mean( ( y_hat - train$Y )^2 )
  
  y_hat <- predict(boostkajalset, newdata=test, n.trees=1000 )
  test_set_mse[lmi] <- mean( ( y_hat - test$Y )^2 )
}

plot( lambda_set, training_set_mse, type='b', pch=19, col='red', xlab='Lambda Value', ylab='MSE' )
lines( lambda_set, test_set_mse, type='b', pch=19, col='green', xlab='Lambda Value', ylab='Test Set MSE' )
grid()

STAT 388 HW 6

Kajal Chokshi

November 10, 2016

Problem 1.

Problem 2.