# Observations
load('data1/raw_curves.rdata')
# combine the data.pars into a matrix
pars.matrix <- matrix(NA,nrow = 280,ncol = 4)
for(i in 1:40)
{
for(j in 1:7)
{
pars.matrix[7*(i-1)+j,] <- as.matrix(dat.pars[[j]][i,])
}
}
# input matrix includes 240 observations
fd.input <- pars.matrix[-(7*(0:39)+1),]
fd.input <- scale(fd.input)
amp.change <- t(mat.amp.change)
phase.change <- t(mat.phase.change)
rm(dat.pars)
rm(mat.amp)
rm(mat.phase)
# Input
colnames(fd.input) <- c('lx','ly','h','phi')
str(fd.input)
## num [1:240, 1:4] -0.297 -0.297 -0.297 -0.297 -0.297 ...
## - attr(*, "scaled:center")= num [1:4] 0.175 0.25 0.4 52.5
## - attr(*, "scaled:scale")= num [1:4] 0.0742 0.0296 0.0593 25.6709
## - attr(*, "dimnames")=List of 2
## ..$ : NULL
## ..$ : chr [1:4] "lx" "ly" "h" "phi"
# regard amplitude change as output
output <- amp.change
str(output) # Each row is a curve
## num [1:240, 1:1001] 0.00808 0.02721 0.06677 0.09854 0.12138 ...
# covariance matrix
output_cov <- cov(output)
# eigen-decomposition
output.eigen <- eigen(output_cov)
# PC scores (Train and test output)
# Each row is a vector of PC scores for one observation
output_pc_scores <- output %*%
output.eigen$vectors[,1:240]
# Use eigen-vectors as basis for projecting PC scores onto original dimension (240 --> 1001)
recovered_output <- output.eigen$vectors[,1:240] %*% t(output_pc_scores)
str(recovered_output) # Each column is a curve
## num [1:1001, 1:240] 0.00806 0.00814 0.00821 0.00826 0.00831 ...
# Recovered error
recovered_error <- mean(apply(t(recovered_output)-output,1,
function(x) sum(x^2)/1001))
recovered_error
## [1] 5.995411e-11
# display part of curves
par(mfrow = c(1,2))
plot(output[1,],ylab = 'amplitude',main = 'original curve 1')
plot(recovered_output[,1],ylab = 'amplitude',main = 'recovered curve 1')
plot(output[5,],ylab = 'amplitude',main = 'original curve 5')
plot(recovered_output[,5],ylab = 'amplitude',main = 'recovered curve 5')
library(groupdata2) # package to split data into k fold
library(tidyverse)
# divide data into 5 folds
set.seed(1)
folds <- fold(data.frame(fd.input),5,method = 'n_dist')$.folds
# both vectors are generated to evalute the train and test error in 5-folds cross validation procedure
train_MSE <- rep(0,5) # train mean square error for each fold
test_MSE <- rep(0,5) # test mean square error for each fold
train_MAE <- rep(0,5) # train mean absolute error for each fold
test_MAE <- rep(0,5) # test mean absolute error for each fold
train_RMSE <- rep(0,5) # train root mean square error for each fold
test_RMSE <- rep(0,5) # test root mean square error for each fold
train_SE <- rep(0,5) # train sum of square residual for each fold
test_SE <- rep(0,5) # test sum of square residual for each fold
# 5-folds cross-validation
for(i in 1:5)
{
train_set <- as_tibble(cbind(fd.input[folds !=i,],output_pc_scores[folds!=i,]))
colnames(train_set)[1:244] <- c('lx','ly','h','phi',rep('coor',240))
# 240 linear models used to predict 240 PC scores
# the first 4 columns are input variables
my_lms <- lapply(5:244,function(x)
lm(coor ~ lx+ly+h+phi,data = train_set[,c(1:4,x)]))
# row number is the number of train sets
# col number is the number of PC scores
train_num <- sum(folds!=i)
test_num <- sum(folds ==i)
# fitted PC scores
fit_coor <- matrix(NA,nrow = train_num,ncol = 240)
# predicted PC scores
predict_coor <- matrix(NA,nrow = test_num,ncol = 240)
for(j in 1:240)
{
fit_coor[,j] <- my_lms[[j]]$fitted.values
predict_coor[,j] <- predict(my_lms[[j]],data.frame(lx = fd.input[folds ==i,1],
ly = fd.input[folds == i,2],
h = fd.input[folds == i,3],
phi = fd.input[folds == i,4]))
}
fit_curve <- output.eigen$vectors[,1:240] %*%
t(fit_coor)
train_MSE[i] <- mean(apply(t(fit_curve)-output[folds != i,],
1,function(x) sum(x^2)/1001))
train_MAE[i] <- mean(abs(t(fit_curve)-output[folds !=i,]))
predict_curve <- output.eigen$vectors[,1:240] %*%
t(predict_coor)
test_MSE[i] <- mean(apply(t(predict_curve)-output[folds == i,],
1,function(x) sum(x^2)/1001))
test_MAE[i] <- mean(abs(t(predict_curve)-output[folds ==i,]))
# Root mean square error
train_RMSE[i] <- sqrt(train_MSE[i])
test_RMSE[i] <- sqrt(test_MSE[i])
# Square error
train_SE[i] <- sum(folds !=i)*train_MSE[i]
test_SE[i] <- sum(folds == i)*test_MSE[i]
# plot part of fitted and predicted curves
par(mfrow = c(2,2))
}
## Warning: The `x` argument of `as_tibble.matrix()` must have unique column names if `.name_repair` is omitted as of tibble 2.0.0.
## Using compatibility `.name_repair`.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was generated.
par(mfrow = c(2,2))
plot(fit_curve[,1],ylab = 'amplitude',main = 'fitted curve 1',ylim = c(-0.15,0.15))
plot(output[which(folds!=i)[1],],ylab = 'amplitude',main = 'original curve 1',ylim = c(-0.15,0.15))
plot(predict_curve[,1],ylab = 'amplitude',main = 'fitted curve 5',ylim = c(-0.5,0.7))
plot(output[which(folds==i)[1],],ylab = 'amplitude',main = 'original curve 5',ylim = c(-0.5,0.7))
\[ \begin{gather} MSE = \frac{1}{np}\sum_{i=1}^n\sum_{j=1}^{p}(f_{ij}-\hat{f_{ij}})^2 \end{gather} \]
\[ \begin{gather} RMSE = \sqrt{\frac{1}{np}\sum_{i=1}^n\sum_{j=1}^{p}(f_{ij}-\hat{f_{ij}})^2} \end{gather} \]
\[ \begin{gather} SE = \frac{1}{p}\sum_{i=1}^n\sum_{j=1}^{p}(f_{ij}-\hat{f_{ij}})^2 \end{gather} \]
\[ \begin{gather} MAE = \frac{1}{np}\sum_{1}^n\sum_{j=1}^{p}|f_{ij}-\hat{f_{ij}}| \end{gather} \]
\[ \begin{gather} MSE_{v1} = \frac{1}{n}\sum_{i=1}^n\sum_{j=1}^{p}(f_{ij}-\hat{f_{ij}})^2 \end{gather} \]
\[ \begin{gather} RMSE_{v1} = \sqrt{\frac{1}{n}\sum_{i=1}^n\sum_{j=1}^{p}(f_{ij}-\hat{f_{ij}})^2} \end{gather} \]
\[ \begin{gather} SE_{v1} = \sum_{i=1}^n\sum_{j=1}^{p}(f_{ij}-\hat{f_{ij}})^2 \end{gather} \]
\[ \begin{gather} MAE_{v1} = \frac{1}{n}\sum_{i=1}^n\sum_{j=1}^{p}|f_{ij}-\hat{f_{ij}}| \end{gather} \]
tibble(train_MSE,test_MSE,train_RMSE,test_RMSE,train_SE,test_SE,train_MAE,test_MAE)
## # A tibble: 5 × 8
## train_MSE test_MSE train_RMSE test_RMSE train_SE test_SE train_MAE test_MAE
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 0.0180 0.0168 0.134 0.129 3.45 0.804 0.0970 0.0946
## 2 0.0177 0.0181 0.133 0.135 3.39 0.870 0.0963 0.0956
## 3 0.0168 0.0218 0.129 0.148 3.22 1.05 0.0935 0.104
## 4 0.0182 0.0158 0.135 0.126 3.50 0.758 0.0974 0.0917
## 5 0.0171 0.0205 0.131 0.143 3.29 0.984 0.0939 0.105
tibble(train_MSE_v1 = 1001*train_MSE,test_MSE_v1 = 1001*test_MSE,
train_RMSE_v1 = sqrt(1001)*train_RMSE,
test_RMSE_v1 = sqrt(1001)*test_RMSE,
train_SE_v1 = 1001*train_SE,
test_SE_v1 = 1001*test_SE,
train_MAE_v1 = 1001*train_MAE,
test_MAE_v1 = 1001*test_MAE) %>% print(n = Inf)
## # A tibble: 5 × 8
## train_MSE_v1 test_MSE_v1 train_RMSE_v1 test_RMSE_v1 train_SE_v1 test_SE_v1
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 18.0 16.8 4.24 4.09 3453. 805.
## 2 17.7 18.1 4.21 4.26 3398. 871.
## 3 16.8 21.8 4.10 4.67 3222. 1047.
## 4 18.2 15.8 4.27 3.98 3499. 759.
## 5 17.2 20.5 4.14 4.53 3294. 985.
## train_MAE_v1 test_MAE_v1
## <dbl> <dbl>
## 1 97.1 94.7
## 2 96.4 95.7
## 3 93.6 104.
## 4 97.5 91.8
## 5 94.0 105.
In this part we will use different architectures to exploit the performance of neural networks in predicting functional data;
## # A tibble: 40 × 12
## unit act optimizer trainRMSE_v1 testRMSE_v1 trainMAE_v1
## <dbl> <fct> <chr> <dbl> <dbl> <dbl>
## 1 32 relu optimizer_adam() 1.97 2.33 38.3
## 2 64 relu optimizer_adam() 1.65 2.09 29.7
## 3 128 relu optimizer_adam() 1.54 1.91 23.4
## 4 256 relu optimizer_adam() 1.47 1.80 19.4
## 5 512 relu optimizer_adam() 1.43 1.77 16.9
## 6 32 exponential optimizer_adam() 1.99 2.29 38.8
## 7 64 exponential optimizer_adam() 1.80 2.18 33.6
## 8 128 exponential optimizer_adam() 1.64 1.99 28.4
## 9 256 exponential optimizer_adam() 1.47 1.84 23.9
## 10 512 exponential optimizer_adam() 1.45 1.83 21.6
## 11 32 relu optimizer_adamax() 2.04 2.33 40.4
## 12 64 relu optimizer_adamax() 1.76 2.13 32.9
## 13 128 relu optimizer_adamax() 1.55 1.94 25.5
## 14 256 relu optimizer_adamax() 1.47 1.83 20.3
## 15 512 relu optimizer_adamax() 1.43 1.80 17.8
## 16 32 exponential optimizer_adamax() 2.15 2.43 42.8
## 17 64 exponential optimizer_adamax() 1.95 2.25 38.0
## 18 128 exponential optimizer_adamax() 1.77 2.13 33.2
## 19 256 exponential optimizer_adamax() 1.58 1.94 27.8
## 20 512 exponential optimizer_adamax() 1.51 1.92 24.8
## 21 32 relu optimizer_nadam() 1.75 2.09 33.1
## 22 64 relu optimizer_nadam() 1.53 1.89 25.8
## 23 128 relu optimizer_nadam() 1.45 1.80 21.4
## 24 256 relu optimizer_nadam() 1.42 1.80 19.8
## 25 512 relu optimizer_nadam() 1.38 1.72 17.7
## 26 32 exponential optimizer_nadam() 1.87 2.24 34.2
## 27 64 exponential optimizer_nadam() 1.66 1.98 29.3
## 28 128 exponential optimizer_nadam() 1.53 1.87 25.8
## 29 256 exponential optimizer_nadam() 1.43 1.78 23.0
## 30 512 exponential optimizer_nadam() 1.45 1.80 23.0
## 31 32 relu optimizer_rmsprop() 1.96 2.27 39.0
## 32 64 relu optimizer_rmsprop() 1.71 2.06 31.5
## 33 128 relu optimizer_rmsprop() 1.57 1.93 26.7
## 34 256 relu optimizer_rmsprop() 1.51 1.86 24.1
## 35 512 relu optimizer_rmsprop() 1.47 1.81 23.0
## 36 32 exponential optimizer_rmsprop() 1.96 2.26 38.0
## 37 64 exponential optimizer_rmsprop() 1.80 2.10 34.6
## 38 128 exponential optimizer_rmsprop() 1.60 1.95 28.8
## 39 256 exponential optimizer_rmsprop() 1.50 1.80 24.4
## 40 512 exponential optimizer_rmsprop() 1.51 1.88 22.4
## testMAE_v1 nPara smoothed_trainRMSE_v1 smoothed_testRMSE_v1
## <dbl> <int> <dbl> <dbl>
## 1 47.1 9136 1.97 2.33
## 2 41.4 20080 1.65 2.08
## 3 36.7 48112 1.52 1.88
## 4 33.9 128752 1.45 1.76
## 5 32.1 388336 1.40 1.73
## 6 45.6 9136 1.97 2.27
## 7 42.5 20080 1.76 2.15
## 8 38.5 48112 1.60 1.95
## 9 35.0 128752 1.43 1.80
## 10 33.8 388336 1.41 1.79
## 11 47.8 9136 2.04 2.33
## 12 42.4 20080 1.76 2.11
## 13 37.9 48112 1.53 1.91
## 14 34.6 128752 1.45 1.80
## 15 33.2 388336 1.40 1.76
## 16 48.9 9136 2.13 2.40
## 17 45.0 20080 1.92 2.22
## 18 41.9 48112 1.73 2.08
## 19 37.8 128752 1.54 1.89
## 20 36.7 388336 1.47 1.88
## 21 43.2 9136 1.75 2.08
## 22 37.3 20080 1.52 1.87
## 23 34.5 48112 1.44 1.78
## 24 34.1 128752 1.40 1.78
## 25 32.1 388336 1.37 1.69
## 26 43.9 9136 1.85 2.22
## 27 38.8 20080 1.64 1.95
## 28 35.3 48112 1.49 1.82
## 29 33.6 128752 1.39 1.74
## 30 34.1 388336 1.41 1.76
## 31 46.6 9136 1.95 2.26
## 32 41.2 20080 1.68 2.03
## 33 38.0 48112 1.53 1.88
## 34 36.7 128752 1.47 1.81
## 35 35.1 388336 1.44 1.77
## 36 45.4 9136 1.94 2.23
## 37 42.3 20080 1.75 2.04
## 38 38.0 48112 1.53 1.89
## 39 34.6 128752 1.44 1.74
## 40 37.0 388336 1.49 1.86
## smoothed_testMAE_v1 smoothed_trainMAE_v1
## <dbl> <dbl>
## 1 47.0 38.3
## 2 41.2 29.5
## 3 36.2 23.0
## 4 33.2 18.9
## 5 31.2 16.2
## 6 45.2 38.4
## 7 41.8 32.9
## 8 37.5 27.5
## 9 34.1 23.0
## 10 32.8 20.4
## 11 47.7 40.4
## 12 42.1 32.6
## 13 37.3 25.1
## 14 33.7 19.6
## 15 32.3 17.1
## 16 48.4 42.4
## 17 44.3 37.4
## 18 41.1 32.3
## 19 36.8 26.8
## 20 35.8 23.7
## 21 43.0 32.9
## 22 36.9 25.4
## 23 34.0 20.9
## 24 33.5 19.3
## 25 31.6 17.4
## 26 43.5 33.8
## 27 38.2 28.7
## 28 34.3 24.6
## 29 32.8 21.7
## 30 33.2 21.8
## 31 46.3 38.7
## 32 40.6 30.8
## 33 37.0 25.5
## 34 35.7 23.0
## 35 34.2 21.8
## 36 44.9 37.5
## 37 41.3 33.5
## 38 36.7 27.3
## 39 33.4 22.8
## 40 36.4 21.8
## # A tibble: 40 × 12
## unit act optimizer trainRMSE_v1 testRMSE_v1 trainMAE_v1
## <dbl> <fct> <chr> <dbl> <dbl> <dbl>
## 1 32 relu optimizer_adam() 1.78 2.13 34.8
## 2 64 relu optimizer_adam() 1.55 1.89 27.8
## 3 128 relu optimizer_adam() 1.45 1.85 23.8
## 4 256 relu optimizer_adam() 1.46 1.86 23.0
## 5 512 relu optimizer_adam() 1.48 1.83 23.9
## 6 32 exponential optimizer_adam() 2.08 2.39 43.2
## 7 64 exponential optimizer_adam() 1.90 2.32 38.3
## 8 128 exponential optimizer_adam() 1.75 2.22 34.0
## 9 256 exponential optimizer_adam() 1.77 2.25 34.6
## 10 512 exponential optimizer_adam() 1.76 2.27 34.3
## 11 32 relu optimizer_adamax() 1.89 2.19 37.7
## 12 64 relu optimizer_adamax() 1.61 2.00 29.8
## 13 128 relu optimizer_adamax() 1.51 1.90 25.3
## 14 256 relu optimizer_adamax() 1.46 1.84 23.4
## 15 512 relu optimizer_adamax() 1.51 1.88 24.6
## 16 32 exponential optimizer_adamax() 2.39 2.73 50.7
## 17 64 exponential optimizer_adamax() 2.20 2.54 45.7
## 18 128 exponential optimizer_adamax() 2.04 2.49 41.3
## 19 256 exponential optimizer_adamax() 1.91 2.44 37.7
## 20 512 exponential optimizer_adamax() 1.83 2.38 35.5
## 21 32 relu optimizer_nadam() 1.68 2.05 32.1
## 22 64 relu optimizer_nadam() 1.48 1.85 25.9
## 23 128 relu optimizer_nadam() 1.43 1.79 23.5
## 24 256 relu optimizer_nadam() 1.52 1.83 25.6
## 25 512 relu optimizer_nadam() 1.53 1.87 25.7
## 26 32 exponential optimizer_nadam() 1.91 2.37 38.6
## 27 64 exponential optimizer_nadam() 1.80 2.20 35.6
## 28 128 exponential optimizer_nadam() 1.77 2.27 35.2
## 29 256 exponential optimizer_nadam() 1.74 2.10 34.4
## 30 512 exponential optimizer_nadam() 1.80 2.12 35.4
## 31 32 relu optimizer_rmsprop() 1.86 2.19 37.0
## 32 64 relu optimizer_rmsprop() 1.55 1.96 28.9
## 33 128 relu optimizer_rmsprop() 1.54 1.89 27.2
## 34 256 relu optimizer_rmsprop() 1.50 1.90 25.8
## 35 512 relu optimizer_rmsprop() 1.67 1.97 30.3
## 36 32 exponential optimizer_rmsprop() 1.88 2.27 38.7
## 37 64 exponential optimizer_rmsprop() 1.84 2.17 36.5
## 38 128 exponential optimizer_rmsprop() 1.81 2.20 36.3
## 39 256 exponential optimizer_rmsprop() 1.76 2.10 34.8
## 40 512 exponential optimizer_rmsprop() 1.81 2.15 33.5
## testMAE_v1 nPara smoothed_trainRMSE_v1 smoothed_testRMSE_v1
## <dbl> <int> <dbl> <dbl>
## 1 44.3 9264 1.78 2.13
## 2 38.0 20336 1.54 1.88
## 3 35.9 48624 1.44 1.82
## 4 36.4 129776 1.43 1.82
## 5 36.0 390384 1.45 1.77
## 6 50.8 9264 2.06 2.37
## 7 48.8 20336 1.86 2.27
## 8 46.4 48624 1.67 2.13
## 9 47.6 129776 1.65 2.11
## 10 48.2 390384 1.61 2.10
## 11 45.9 9264 1.89 2.19
## 12 40.6 20336 1.60 1.98
## 13 37.9 48624 1.48 1.86
## 14 36.0 129776 1.41 1.77
## 15 37.6 390384 1.44 1.80
## 16 58.3 9264 2.34 2.67
## 17 53.5 20336 2.12 2.44
## 18 52.4 48624 1.92 2.35
## 19 51.6 129776 1.74 2.23
## 20 50.4 390384 1.65 2.15
## 21 42.1 9264 1.68 2.04
## 22 36.8 20336 1.47 1.84
## 23 35.1 48624 1.42 1.77
## 24 36.9 129776 1.50 1.80
## 25 37.2 390384 1.50 1.83
## 26 49.5 9264 1.90 2.35
## 27 46.2 20336 1.75 2.15
## 28 47.3 48624 1.68 2.18
## 29 44.5 129776 1.67 2.03
## 30 44.6 390384 1.74 2.05
## 31 45.1 9264 1.85 2.18
## 32 39.8 20336 1.53 1.93
## 33 37.9 48624 1.51 1.85
## 34 38.4 129776 1.46 1.86
## 35 40.1 390384 1.62 1.91
## 36 47.5 9264 1.84 2.23
## 37 44.9 20336 1.77 2.11
## 38 46.0 48624 1.68 2.07
## 39 44.0 129776 1.69 2.03
## 40 44.5 390384 1.75 2.09
## smoothed_testMAE_v1 smoothed_trainMAE_v1
## <dbl> <dbl>
## 1 44.3 34.8
## 2 37.8 27.6
## 3 35.4 23.4
## 4 35.5 22.1
## 5 34.7 22.7
## 6 50.3 42.8
## 7 47.7 37.3
## 8 44.4 32.3
## 9 44.7 32.1
## 10 44.7 30.9
## 11 45.9 37.7
## 12 40.3 29.5
## 13 37.0 24.5
## 14 34.4 22.1
## 15 35.6 22.7
## 16 57.0 49.5
## 17 51.3 43.9
## 18 49.5 38.8
## 19 47.4 34.4
## 20 45.6 31.5
## 21 41.9 31.9
## 22 36.6 25.6
## 23 34.7 23.0
## 24 36.3 24.9
## 25 36.3 24.9
## 26 49.1 38.3
## 27 45.1 34.5
## 28 45.3 33.1
## 29 42.9 32.7
## 30 43.2 34.0
## 31 44.8 36.7
## 32 39.3 28.2
## 33 37.0 26.1
## 34 37.5 24.5
## 35 38.9 28.8
## 36 46.7 37.8
## 37 43.4 34.7
## 38 43.5 33.3
## 39 42.4 32.8
## 40 43.1 31.8
# Observations
fd.input <- readRDS('data2/fd_input.rds') # matrix
fd.output <- readRDS('data2/fd_output.rds') # data.frame
ref_amp <- readRDS('data2/ref_amp.rds') # matrix
trans_amp <- readRDS('data2/trans_amp.rds') # matrix
str(fd.input)
## num [1:500, 1:4] 0.891 0.705 0.779 0.207 0.097 0.018 0.182 0.478 0.629 0.408 ...
## - attr(*, "dimnames")=List of 2
## ..$ : NULL
## ..$ : chr [1:4] "a" "b" "tBCB" "r_MDA"
dim(ref_amp) # 500 curves, each row is a curve
## [1] 500 1001
dim(trans_amp) # 500 curves, each row is a curve
## [1] 500 1001
output <- ref_amp # choose reflection amplitude curves as output
# covariance matrix
output_cov <- cov(output)
# eigen-decomposition
output.eigen <- eigen(output_cov)
# PC scores (Train and test output)
# Each row is a vector of PC scores for one observation
output_pc_scores <- output %*%
output.eigen$vectors[,1:500]
# Use eigen-vectors as basis for projecting PC scores onto original dimension (240 --> 1001)
recovered_output <- output.eigen$vectors[,1:500] %*% t(output_pc_scores)
str(recovered_output) # Each column is a curve
## num [1:1001, 1:500] 0.958 0.958 0.958 0.958 0.958 ...
## - attr(*, "dimnames")=List of 2
## ..$ : NULL
## ..$ : chr [1:500] "Abs.s11.1.." "Abs.s11.2.." "Abs.s11.3.." "Abs.s11.4.." ...
# Recovered error
recovered_error <- mean(apply(t(recovered_output)-output,1,
function(x) sum(x^2)/1001))
recovered_error
## [1] 1.337268e-09
# display part of curves
par(mfrow = c(1,2))
plot(output[1,],ylab = 'amplitude',main = 'original curve 1',ylim = c(0,1))
plot(recovered_output[,1],ylab = 'reflection amplitude',main = 'recovered curve 1',ylim =c(0,1))
plot(output[4,],ylab = 'amplitude',main = 'original curve 4',ylim =c(0,1))
plot(recovered_output[,4],ylab = 'reflection amplitude',main = 'recovered curve 4',ylim = c(0,1))
library(groupdata2) # package to split data into k fold
library(tidyverse)
# divide data into 5 folds
set.seed(1)
folds <- fold(data.frame(fd.input),5,method = 'n_dist')$.folds
# both vectors are generated to evalute the train and test error in 5-folds cross validation procedure
train_MSE <- rep(0,5) # train mean square error for each fold
test_MSE <- rep(0,5) # test mean square error for each fold
train_MAE <- rep(0,5) # train mean absolute error for each fold
test_MAE <- rep(0,5) # test mean absolute error for each fold
train_RMSE <- rep(0,5) # train root mean square error for each fold
test_RMSE <- rep(0,5) # test root mean square error for each fold
train_SE <- rep(0,5) # train sum of square residual for each fold
test_SE <- rep(0,5) # test sum of square residual for each fold
# 5-folds cross-validation
for(i in 1:5)
{
# the first 4 variables are 'a,b,tBCB,r_MDA"
train_set <- as_tibble(cbind(fd.input[folds !=i,],output_pc_scores[folds!=i,]))
colnames(train_set)[5:504] <- rep('coor',500)
# 240 linear models used to predict 240 PC scores
# the first 4 columns are input variables
my_lms <- lapply(5:504,function(x)
lm(coor ~ a+b+tBCB+r_MDA,data = train_set[,c(1:4,x)]))
# row number is the number of train sets
# col number is the number of PC scores
train_num <- sum(folds!=i)
test_num <- sum(folds ==i)
# fitted PC scores
fit_coor <- matrix(NA,nrow = train_num,ncol = 500)
# predicted PC scores
predict_coor <- matrix(NA,nrow = test_num,ncol = 500)
for(j in 1:500)
{
fit_coor[,j] <- my_lms[[j]]$fitted.values
predict_coor[,j] <- predict(my_lms[[j]],data.frame(a = fd.input[folds ==i,1],
b = fd.input[folds == i,2],
tBCB = fd.input[folds == i,3],
r_MDA = fd.input[folds == i,4]))
}
fit_curve <- output.eigen$vectors[,1:500] %*%
t(fit_coor)
train_MSE[i] <- mean(apply(t(fit_curve)-output[folds != i,],
1,function(x) sum(x^2)/1001))
train_MAE[i] <- mean(abs(t(fit_curve)-output[folds !=i,]))
predict_curve <- output.eigen$vectors[,1:500] %*%
t(predict_coor)
test_MSE[i] <- mean(apply(t(predict_curve)-output[folds == i,],
1,function(x) sum(x^2)/1001))
test_MAE[i] <- mean(abs(t(predict_curve)-output[folds ==i,]))
# Root mean square error
train_RMSE[i] <- sqrt(train_MSE[i])
test_RMSE[i] <- sqrt(test_MSE[i])
# Square error
train_SE[i] <- sum(folds !=i)*train_MSE[i]
test_SE[i] <- sum(folds == i)*test_MSE[i]
# plot part of fitted and predicted curves
par(mfrow = c(2,2))
}
par(mfrow = c(2,2))
plot(fit_curve[,1],ylab = 'amplitude',main = 'fitted curve 1',ylim = c(0,1))
plot(output[which(folds!=i)[1],],ylab = 'reflection amplitude',main = 'original curve 1',ylim= c(0,1))
plot(predict_curve[,1],ylab = 'amplitude',main = 'fitted curve 4',ylim = c(0,1))
plot(output[which(folds==i)[1],],ylab = 'reflection amplitude',main = 'original curve 4',ylim = c(0,1))
tibble(train_MSE,test_MSE,train_RMSE,test_RMSE,train_SE,test_SE,train_MAE,test_MAE)
## # A tibble: 5 × 8
## train_MSE test_MSE train_RMSE test_RMSE train_SE test_SE train_MAE test_MAE
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 0.00417 0.00446 0.0646 0.0668 1.67 0.446 0.0403 0.0443
## 2 0.00435 0.00373 0.0660 0.0611 1.74 0.373 0.0416 0.0388
## 3 0.00424 0.00425 0.0651 0.0652 1.70 0.425 0.0408 0.0417
## 4 0.00416 0.00445 0.0645 0.0667 1.66 0.445 0.0408 0.0408
## 5 0.00407 0.00486 0.0638 0.0697 1.63 0.486 0.0405 0.0418
tibble(train_MSE_v1 = 1001*train_MSE,test_MSE_v1 = 1001*test_MSE,
train_RMSE_v1 = sqrt(1001)*train_RMSE,
test_RMSE_v1 = sqrt(1001)*test_RMSE,
train_SE_v1 = 1001*train_SE,
test_SE_v1 = 1001*test_SE,
train_MAE_v1 = 1001*train_MAE,
test_MAE_v1 = 1001*test_MAE)
## # A tibble: 5 × 8
## train_MSE_v1 test_MSE_v1 train_RMSE_v1 test_RMSE_v1 train_SE_v1 test_SE_v1
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 4.17 4.46 2.04 2.11 1670. 446.
## 2 4.36 3.74 2.09 1.93 1742. 374.
## 3 4.24 4.26 2.06 2.06 1697. 426.
## 4 4.17 4.45 2.04 2.11 1666. 445.
## 5 4.07 4.87 2.02 2.21 1628. 487.
## train_MAE_v1 test_MAE_v1
## <dbl> <dbl>
## 1 40.3 44.3
## 2 41.6 38.9
## 3 40.8 41.8
## 4 40.9 40.9
## 5 40.6 41.8