1.generate a set of training data and a set of testing data

require(MASS)
#training data
set.seed(100)
n<-50
train.x<-matrix(runif(n,-2,2),nrow = n)
train.y<-2+0.75*sin(train.x)-0.75*cos(train.x)+rnorm(n,0,0.2)
#testing data
set.seed(2)
test.x<-matrix(runif(n,-2,2),nrow = n)
test.y<-2+0.75*sin(test.x)-0.75*cos(test.x)+rnorm(n,0,0.2)

Plot

2.fit a linear curve

(2a)
mod1<-lm(train.y~train.x)
require(knitr)
kable(summary(mod1)$coef,type='html',caption = "least square method")
least square method
Estimate Std. Error t value Pr(>|t|)
(Intercept) 1.5497215 0.0476145 32.54728 0
train.x 0.6527933 0.0463274 14.09088 0
(2b)
#calculate MSE
M1.train.mse<-sum((train.y-fitted(mod1))^2)/n
M1.test.mse<-sum((test.y-coef(mod1)[1]-coef(mod1)[2]*test.x)^2)/n

MSE for training data is

## [1] 0.1057217

MSE for testing data is

## [1] 0.165541

3.fit a polynomial curve

(3a)
mod2<-lm(train.y~train.x+I(train.x^2)+I(train.x^3))
kable(summary(mod2)$coef,type='html',caption = "least square method")
least square method
Estimate Std. Error t value Pr(>|t|)
(Intercept) 1.2803996 0.0458186 27.944957 0.0000000
train.x 0.6984202 0.0688970 10.137165 0.0000000
I(train.x^2) 0.2811826 0.0350294 8.027055 0.0000000
I(train.x^3) -0.0692595 0.0329634 -2.101099 0.0411407
(3b)
#calculate MSE
M2.train.mse<-sum((train.y-fitted(mod2))^2)/n
mat.x<-cbind(1,test.x,test.x^2,test.x^3)
#calculate square error by testing data
res<-(test.y-mat.x%*%as.matrix(coef(mod2)))^2
#MSE for testing data
M2.test.mse<-sum(res)/n

MSE for training data is

## [1] 0.04379279

MSE for testing data is

## [1] 0.05448764

4.fit a spline curve(truncated power basis)

#create design matrix
mat.tpb<-function(x,nknots=12){
    n<-length(x)
    knots<-seq(min(x)+0.2,max(x)-0.2,len=nknots)
    zb<-cbind(1,x,x^2,x^3)
    zt<-matrix(NA,nrow=n,ncol=nknots)
    for(i in 1:nknots){
      zt[,i]<-ifelse(x>knots[i],x^3,0)
    }
    cbind(zb,zt)
}
z<-mat.tpb(train.x,12)

Design matrix is not full rank, so the OLS method can’t work. I use ginv to get generalized inverse of the design matrix. Remember that the OLS method, we can estimate \(\mathbf{\hat{\beta}}\) as \((\mathbf{X}^\intercal \mathbf{X})^{-1}\mathbf{X}^\intercal\mathbf{Y}\).

#use generalized inverse to calculate the estimates of beta hat.
mod3<-ginv(t(z)%*%z)%*%t(z)%*%train.y
(4a)
##        estimates
## 1    1.387972364
## 2    1.236147601
## 3    0.113799272
## 4   -0.292244063
## 5   -0.158247803
## 6   -0.281820827
## 7    0.226614704
## 8   -1.156615237
## 9   11.744308125
## 10 -18.341635885
## 11   5.059879059
## 12   2.109346517
## 13   0.597092379
## 14   0.256863839
## 15  -0.005830953
## 16   0.127789820
(4b)
M3.train.mse<-sum((train.y-z%*%as.matrix(mod3))^2)/n
mod3.mat.x<-mat.tpb(test.x,12)
#calculate square error by testing data
mod3.res<-(test.y-mod3.mat.x%*%as.matrix(mod3))^2 
#MSE for testing data
M3.test.mse<-sum(mod3.res)/n 

MSE for training data is

## [1] 0.02064758

MSE for testing data is

## [1] 0.169067
(5a) reproduce two plots based on simulated data and the results.

(5b)

6.Bias-Variance decomposition

First,to calculate \(\mathbf{E}(\hat{\mathbf{f}}(x_0))\),we need to generate 1000 set of independent data.

new.x0<-matrix(runif(1000),nrow=1000)
M1.exp.x0<-sum(coef(mod1)[1]+coef(mod1)[2]*new.x0)/1000
mat.x0<-cbind(1,new.x0,new.x0^2,new.x0^3)
M2.exp.x0<-sum(mat.x0%*%as.matrix(coef(mod2)))/1000
mod3.mat.x0<-mat.tpb(new.x0,12)
M3.exp.x0<-sum(mod3.mat.x0%*%as.matrix(mod3))/1000 

Calculate Bias terms:

mod<-function(x){
  2+0.75*sin(x)-0.75*cos(x)
}
#calculate Bias^2
exp.x0<-matrix(rep(c(M1.exp.x0,M2.exp.x0,M3.exp.x0),50),nrow = 50,byrow = T)
bias<-apply((cbind(mod(test.x),mod(test.x),mod(test.x))-exp.x0)^2,MARGIN = 2,FUN = mean)
print(bias)
## [1] 0.5657276 0.5338804 0.5925484

Calculate Variance terms:

fit.val<-cbind(coef(mod1)[1]+coef(mod1)[2]*test.x,mat.x%*%as.matrix(coef(mod2)),mod3.mat.x%*%as.matrix(mod3))
variance<-apply((fit.val-exp.x0)^2,MARGIN = 2,FUN = mean)
variance<-unname(variance)
print(variance)
## [1] 0.7208346 0.6147095 0.7341239
## integer(0)