Example Ozone data
library(ElemStatLearn)
data(ozone,package="ElemStatLearn")
ozone<- ozone[order(ozone$ozone),]
head(ozone)
## ozone radiation temperature wind
## 17 1 8 59 9.7
## 19 4 25 61 9.7
## 14 6 78 57 18.4
## 45 7 48 80 14.3
## 106 7 49 69 10.3
## 7 8 19 61 20.1
#And we wanna find the relationship between temperature and ozone
Bagged Loess
ll<- matrix(NA,nrow=10,ncol=155)
for(i in 1:10){
ss<- sample(1:dim(ozone)[1],replace=T) #Randomly pick up the ozone data
ozone0<- ozone[ss,]
ozone0<- ozone0[order(ozone0$ozone),]
loess0<- loess(temperature~ozone,data=ozone0,span=0.2) #loess function can built polynomial regression, which is non-linear regression
ll[i,]<- predict(loess0,newdata=data.frame(ozone=(1:155)))
#Pass the 1:155 in order and see the preidciotn, and repet it 10 times, which means 10 columns.
}
## Warning in simpleLoess(y, x, w, span, degree, parametric, drop.square,
## normalize, : pseudoinverse used at 21
## Warning in simpleLoess(y, x, w, span, degree, parametric, drop.square,
## normalize, : neighborhood radius 2
## Warning in simpleLoess(y, x, w, span, degree, parametric, drop.square,
## normalize, : reciprocal condition number 1.7742e-16
plot(ozone$ozone,ozone$temperature,pch=19,cex=0.5)
for(i in 1:10){
lines(1:155,ll[i,],col="grey",lwd=2)
}
#Add the line of 10 observations, and find it has a little bit overfitting problem
lines(1:155,apply(ll,2,mean),col="red",lwd=2)

#Also, we can use the mean value as a line
More bagging in caret
Also, we can use caret to do the bagging
library(caret)
## Loading required package: lattice
## Loading required package: ggplot2
library(party)
## Loading required package: grid
## Loading required package: mvtnorm
## Loading required package: modeltools
## Loading required package: stats4
## Loading required package: strucchange
## Loading required package: zoo
##
## Attaching package: 'zoo'
##
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
##
## Loading required package: sandwich
predictors = data.frame(ozone=ozone$ozone)
temperature = ozone$temperature
treebag <- bag(predictors, temperature, B = 10,
bagControl = bagControl(fit = ctreeBag$fit,
predict = ctreeBag$pred,
aggregate = ctreeBag$aggregate))
## Warning: executing %dopar% sequentially: no parallel backend registered
#Bag function can help you to make the bagging as what loess do, but you should tell what you need in bagControl
plot(ozone$ozone,temperature,col='lightgrey',pch=19)
points(ozone$ozone,predict(treebag$fits[[1]]$fit,predictors),pch=19,col="red")
#treebag$fits[[1]]$fit is one condition the classification tree formular , and we pass the ozone data, it can predict what temperature it should be
points(ozone$ozone,predict(treebag,predictors),pch=19,col="blue") #This is the all conditions will be used, so the blue will capture better the red line
