1. Data 1

1.1 Read data1 input and output

# Observations
load('data1/raw_curves.rdata')

# combine the data.pars into a matrix
pars.matrix <- matrix(NA,nrow = 280,ncol = 4)

for(i in 1:40)
{
  for(j in 1:7)
  {
    pars.matrix[7*(i-1)+j,] <- as.matrix(dat.pars[[j]][i,])
  }
}

# input matrix includes 240 observations
fd.input <- pars.matrix[-(7*(0:39)+1),]
fd.input <- scale(fd.input)

amp.change <- t(mat.amp.change)
phase.change <- t(mat.phase.change)
rm(dat.pars)
rm(mat.amp)
rm(mat.phase)


# Input
colnames(fd.input) <- c('lx','ly','h','phi')
str(fd.input)
##  num [1:240, 1:4] -0.297 -0.297 -0.297 -0.297 -0.297 ...
##  - attr(*, "scaled:center")= num [1:4] 0.175 0.25 0.4 52.5
##  - attr(*, "scaled:scale")= num [1:4] 0.0742 0.0296 0.0593 25.6709
##  - attr(*, "dimnames")=List of 2
##   ..$ : NULL
##   ..$ : chr [1:4] "lx" "ly" "h" "phi"
# regard amplitude change as output
output <- amp.change
str(output) # Each row is a curve
##  num [1:240, 1:1001] 0.00808 0.02721 0.06677 0.09854 0.12138 ...

1.2 FPCA for data1 (Use eigen-decomposition procedure in doing FPCA)

# covariance matrix
output_cov <- cov(output)
# eigen-decomposition
output.eigen <- eigen(output_cov)

# PC scores (Train and test output)
# Each row is a vector of PC scores for one observation
output_pc_scores <- output %*%
  output.eigen$vectors[,1:240]

# Use eigen-vectors as basis for projecting PC scores onto original dimension (240 --> 1001)
recovered_output <- output.eigen$vectors[,1:240] %*% t(output_pc_scores)
str(recovered_output) # Each column is a curve
##  num [1:1001, 1:240] 0.00806 0.00814 0.00821 0.00826 0.00831 ...
# Recovered error
recovered_error <- mean(apply(t(recovered_output)-output,1,
                              function(x) sum(x^2)/1001))
recovered_error
## [1] 5.995411e-11
# display part of curves
par(mfrow = c(1,2))
plot(output[1,],ylab = 'amplitude',main = 'original curve 1')
plot(recovered_output[,1],ylab = 'amplitude',main = 'recovered curve 1')

plot(output[5,],ylab = 'amplitude',main = 'original curve 5')
plot(recovered_output[,5],ylab = 'amplitude',main = 'recovered curve 5')

1.3 Do Linear regression for fpca output

library(groupdata2) # package to split data into k fold
library(tidyverse)

# divide data into 5 folds
set.seed(1)
folds <- fold(data.frame(fd.input),5,method = 'n_dist')$.folds

# both vectors are generated to evalute the train and test error in 5-folds cross validation procedure
train_MSE <- rep(0,5) # train mean square error for each fold
test_MSE <- rep(0,5) # test mean square error for each fold

train_MAE <- rep(0,5) # train mean absolute error for each fold
test_MAE <- rep(0,5) # test mean absolute error for each fold

train_RMSE <- rep(0,5) # train root mean square error for each fold
test_RMSE <- rep(0,5) # test root mean square error for each fold

train_SE <- rep(0,5) # train sum of square residual for each fold
test_SE <- rep(0,5) # test sum of square residual for each fold


# 5-folds cross-validation
for(i in 1:5)
{
  train_set <- as_tibble(cbind(fd.input[folds !=i,],output_pc_scores[folds!=i,]))
  colnames(train_set)[1:244] <- c('lx','ly','h','phi',rep('coor',240))
  
  # 240 linear models used to predict 240 PC scores
  # the first 4 columns are input variables
  my_lms <- lapply(5:244,function(x)
    lm(coor ~ lx+ly+h+phi,data = train_set[,c(1:4,x)]))
  # row number is the number of train sets
  # col number is the number of PC scores
  train_num <- sum(folds!=i)
  test_num <- sum(folds ==i)
  # fitted PC scores
  fit_coor <- matrix(NA,nrow = train_num,ncol = 240)
  # predicted PC scores
  predict_coor <- matrix(NA,nrow = test_num,ncol = 240)
  for(j in 1:240)
  {
    fit_coor[,j] <- my_lms[[j]]$fitted.values
    predict_coor[,j] <- predict(my_lms[[j]],data.frame(lx = fd.input[folds ==i,1],
                                                       ly = fd.input[folds == i,2],
                                                       h = fd.input[folds == i,3],
                                                       phi = fd.input[folds == i,4]))
  }
  fit_curve <- output.eigen$vectors[,1:240] %*%
    t(fit_coor)
  train_MSE[i] <- mean(apply(t(fit_curve)-output[folds != i,],
                                 1,function(x) sum(x^2)/1001))
  train_MAE[i] <- mean(abs(t(fit_curve)-output[folds !=i,]))
  
  predict_curve <- output.eigen$vectors[,1:240] %*%
      t(predict_coor)
  test_MSE[i] <- mean(apply(t(predict_curve)-output[folds == i,],
                                1,function(x) sum(x^2)/1001))
  test_MAE[i] <- mean(abs(t(predict_curve)-output[folds ==i,]))
  
  # Root mean square error
  train_RMSE[i] <- sqrt(train_MSE[i])
  test_RMSE[i] <- sqrt(test_MSE[i])

  # Square error
  train_SE[i] <- sum(folds !=i)*train_MSE[i]
  test_SE[i] <- sum(folds == i)*test_MSE[i]
  
  # plot part of fitted and predicted curves
  par(mfrow = c(2,2))
  
}
## Warning: The `x` argument of `as_tibble.matrix()` must have unique column names if `.name_repair` is omitted as of tibble 2.0.0.
## Using compatibility `.name_repair`.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was generated.
par(mfrow = c(2,2))
plot(fit_curve[,1],ylab = 'amplitude',main = 'fitted curve 1',ylim = c(-0.15,0.15))
plot(output[which(folds!=i)[1],],ylab = 'amplitude',main = 'original curve 1',ylim = c(-0.15,0.15))
plot(predict_curve[,1],ylab = 'amplitude',main = 'fitted curve 5',ylim = c(-0.5,0.7))
plot(output[which(folds==i)[1],],ylab = 'amplitude',main = 'original curve 5',ylim = c(-0.5,0.7))

1.4 Error metrics

  • MSE - Mean square error

\[ \begin{gather} MSE = \frac{1}{np}\sum_{i=1}^n\sum_{j=1}^{p}(f_{ij}-\hat{f_{ij}})^2 \end{gather} \]

  • RMSE - Root mean square error

\[ \begin{gather} RMSE = \sqrt{\frac{1}{np}\sum_{i=1}^n\sum_{j=1}^{p}(f_{ij}-\hat{f_{ij}})^2} \end{gather} \]

  • SE - Residual sum of squares

\[ \begin{gather} SE = \frac{1}{p}\sum_{i=1}^n\sum_{j=1}^{p}(f_{ij}-\hat{f_{ij}})^2 \end{gather} \]

  • MAE - Mean absolute error

\[ \begin{gather} MAE = \frac{1}{np}\sum_{1}^n\sum_{j=1}^{p}|f_{ij}-\hat{f_{ij}}| \end{gather} \]

  • MSE_v1 - Modified Mean square error

\[ \begin{gather} MSE_{v1} = \frac{1}{n}\sum_{i=1}^n\sum_{j=1}^{p}(f_{ij}-\hat{f_{ij}})^2 \end{gather} \]

  • RMSE_v1 - Modified Root mean square error

\[ \begin{gather} RMSE_{v1} = \sqrt{\frac{1}{n}\sum_{i=1}^n\sum_{j=1}^{p}(f_{ij}-\hat{f_{ij}})^2} \end{gather} \]

  • SE_v1 - Modified Residual sum of squares

\[ \begin{gather} SE_{v1} = \sum_{i=1}^n\sum_{j=1}^{p}(f_{ij}-\hat{f_{ij}})^2 \end{gather} \]

  • MAE_v1 - Modified Mean absolute error

\[ \begin{gather} MAE_{v1} = \frac{1}{n}\sum_{i=1}^n\sum_{j=1}^{p}|f_{ij}-\hat{f_{ij}}| \end{gather} \]

  • \(n\) is the number of curves, \(p\) is the curve dimension
  • \(f_{ij}\) and \(\hat{f}_{ij}\) are original curve \(i\) with \(j^{th}\) value and predicted curve \(i\) with \(j^{th}\) value
tibble(train_MSE,test_MSE,train_RMSE,test_RMSE,train_SE,test_SE,train_MAE,test_MAE)
## # A tibble: 5 × 8
##   train_MSE test_MSE train_RMSE test_RMSE train_SE test_SE train_MAE test_MAE
##       <dbl>    <dbl>      <dbl>     <dbl>    <dbl>   <dbl>     <dbl>    <dbl>
## 1    0.0180   0.0168      0.134     0.129     3.45   0.804    0.0970   0.0946
## 2    0.0177   0.0181      0.133     0.135     3.39   0.870    0.0963   0.0956
## 3    0.0168   0.0218      0.129     0.148     3.22   1.05     0.0935   0.104 
## 4    0.0182   0.0158      0.135     0.126     3.50   0.758    0.0974   0.0917
## 5    0.0171   0.0205      0.131     0.143     3.29   0.984    0.0939   0.105
tibble(train_MSE_v1 = 1001*train_MSE,test_MSE_v1 = 1001*test_MSE,
       train_RMSE_v1 = sqrt(1001)*train_RMSE,
       test_RMSE_v1 = sqrt(1001)*test_RMSE,
       train_SE_v1 = 1001*train_SE,
       test_SE_v1 = 1001*test_SE,
       train_MAE_v1 = 1001*train_MAE,
       test_MAE_v1 = 1001*test_MAE) %>% print(n = Inf)
## # A tibble: 5 × 8
##   train_MSE_v1 test_MSE_v1 train_RMSE_v1 test_RMSE_v1 train_SE_v1 test_SE_v1
##          <dbl>       <dbl>         <dbl>        <dbl>       <dbl>      <dbl>
## 1         18.0        16.8          4.24         4.09       3453.       805.
## 2         17.7        18.1          4.21         4.26       3398.       871.
## 3         16.8        21.8          4.10         4.67       3222.      1047.
## 4         18.2        15.8          4.27         3.98       3499.       759.
## 5         17.2        20.5          4.14         4.53       3294.       985.
##   train_MAE_v1 test_MAE_v1
##          <dbl>       <dbl>
## 1         97.1        94.7
## 2         96.4        95.7
## 3         93.6       104. 
## 4         97.5        91.8
## 5         94.0       105.

1.5 Design of experiment using neural networks

In this part we will use different architectures to exploit the performance of neural networks in predicting functional data;

  • We use \(B\) to represent batch normalization layer
  • We use \(A\) to represent fully-connected layer that has activation function

1.5.1 relu+act2 (500 epochs)

## # A tibble: 40 × 12
##     unit act         optimizer           trainRMSE_v1 testRMSE_v1 trainMAE_v1
##    <dbl> <fct>       <chr>                      <dbl>       <dbl>       <dbl>
##  1    32 relu        optimizer_adam()            1.97        2.33        38.3
##  2    64 relu        optimizer_adam()            1.65        2.09        29.7
##  3   128 relu        optimizer_adam()            1.54        1.91        23.4
##  4   256 relu        optimizer_adam()            1.47        1.80        19.4
##  5   512 relu        optimizer_adam()            1.43        1.77        16.9
##  6    32 exponential optimizer_adam()            1.99        2.29        38.8
##  7    64 exponential optimizer_adam()            1.80        2.18        33.6
##  8   128 exponential optimizer_adam()            1.64        1.99        28.4
##  9   256 exponential optimizer_adam()            1.47        1.84        23.9
## 10   512 exponential optimizer_adam()            1.45        1.83        21.6
## 11    32 relu        optimizer_adamax()          2.04        2.33        40.4
## 12    64 relu        optimizer_adamax()          1.76        2.13        32.9
## 13   128 relu        optimizer_adamax()          1.55        1.94        25.5
## 14   256 relu        optimizer_adamax()          1.47        1.83        20.3
## 15   512 relu        optimizer_adamax()          1.43        1.80        17.8
## 16    32 exponential optimizer_adamax()          2.15        2.43        42.8
## 17    64 exponential optimizer_adamax()          1.95        2.25        38.0
## 18   128 exponential optimizer_adamax()          1.77        2.13        33.2
## 19   256 exponential optimizer_adamax()          1.58        1.94        27.8
## 20   512 exponential optimizer_adamax()          1.51        1.92        24.8
## 21    32 relu        optimizer_nadam()           1.75        2.09        33.1
## 22    64 relu        optimizer_nadam()           1.53        1.89        25.8
## 23   128 relu        optimizer_nadam()           1.45        1.80        21.4
## 24   256 relu        optimizer_nadam()           1.42        1.80        19.8
## 25   512 relu        optimizer_nadam()           1.38        1.72        17.7
## 26    32 exponential optimizer_nadam()           1.87        2.24        34.2
## 27    64 exponential optimizer_nadam()           1.66        1.98        29.3
## 28   128 exponential optimizer_nadam()           1.53        1.87        25.8
## 29   256 exponential optimizer_nadam()           1.43        1.78        23.0
## 30   512 exponential optimizer_nadam()           1.45        1.80        23.0
## 31    32 relu        optimizer_rmsprop()         1.96        2.27        39.0
## 32    64 relu        optimizer_rmsprop()         1.71        2.06        31.5
## 33   128 relu        optimizer_rmsprop()         1.57        1.93        26.7
## 34   256 relu        optimizer_rmsprop()         1.51        1.86        24.1
## 35   512 relu        optimizer_rmsprop()         1.47        1.81        23.0
## 36    32 exponential optimizer_rmsprop()         1.96        2.26        38.0
## 37    64 exponential optimizer_rmsprop()         1.80        2.10        34.6
## 38   128 exponential optimizer_rmsprop()         1.60        1.95        28.8
## 39   256 exponential optimizer_rmsprop()         1.50        1.80        24.4
## 40   512 exponential optimizer_rmsprop()         1.51        1.88        22.4
##    testMAE_v1  nPara smoothed_trainRMSE_v1 smoothed_testRMSE_v1
##         <dbl>  <int>                 <dbl>                <dbl>
##  1       47.1   9136                  1.97                 2.33
##  2       41.4  20080                  1.65                 2.08
##  3       36.7  48112                  1.52                 1.88
##  4       33.9 128752                  1.45                 1.76
##  5       32.1 388336                  1.40                 1.73
##  6       45.6   9136                  1.97                 2.27
##  7       42.5  20080                  1.76                 2.15
##  8       38.5  48112                  1.60                 1.95
##  9       35.0 128752                  1.43                 1.80
## 10       33.8 388336                  1.41                 1.79
## 11       47.8   9136                  2.04                 2.33
## 12       42.4  20080                  1.76                 2.11
## 13       37.9  48112                  1.53                 1.91
## 14       34.6 128752                  1.45                 1.80
## 15       33.2 388336                  1.40                 1.76
## 16       48.9   9136                  2.13                 2.40
## 17       45.0  20080                  1.92                 2.22
## 18       41.9  48112                  1.73                 2.08
## 19       37.8 128752                  1.54                 1.89
## 20       36.7 388336                  1.47                 1.88
## 21       43.2   9136                  1.75                 2.08
## 22       37.3  20080                  1.52                 1.87
## 23       34.5  48112                  1.44                 1.78
## 24       34.1 128752                  1.40                 1.78
## 25       32.1 388336                  1.37                 1.69
## 26       43.9   9136                  1.85                 2.22
## 27       38.8  20080                  1.64                 1.95
## 28       35.3  48112                  1.49                 1.82
## 29       33.6 128752                  1.39                 1.74
## 30       34.1 388336                  1.41                 1.76
## 31       46.6   9136                  1.95                 2.26
## 32       41.2  20080                  1.68                 2.03
## 33       38.0  48112                  1.53                 1.88
## 34       36.7 128752                  1.47                 1.81
## 35       35.1 388336                  1.44                 1.77
## 36       45.4   9136                  1.94                 2.23
## 37       42.3  20080                  1.75                 2.04
## 38       38.0  48112                  1.53                 1.89
## 39       34.6 128752                  1.44                 1.74
## 40       37.0 388336                  1.49                 1.86
##    smoothed_testMAE_v1 smoothed_trainMAE_v1
##                  <dbl>                <dbl>
##  1                47.0                 38.3
##  2                41.2                 29.5
##  3                36.2                 23.0
##  4                33.2                 18.9
##  5                31.2                 16.2
##  6                45.2                 38.4
##  7                41.8                 32.9
##  8                37.5                 27.5
##  9                34.1                 23.0
## 10                32.8                 20.4
## 11                47.7                 40.4
## 12                42.1                 32.6
## 13                37.3                 25.1
## 14                33.7                 19.6
## 15                32.3                 17.1
## 16                48.4                 42.4
## 17                44.3                 37.4
## 18                41.1                 32.3
## 19                36.8                 26.8
## 20                35.8                 23.7
## 21                43.0                 32.9
## 22                36.9                 25.4
## 23                34.0                 20.9
## 24                33.5                 19.3
## 25                31.6                 17.4
## 26                43.5                 33.8
## 27                38.2                 28.7
## 28                34.3                 24.6
## 29                32.8                 21.7
## 30                33.2                 21.8
## 31                46.3                 38.7
## 32                40.6                 30.8
## 33                37.0                 25.5
## 34                35.7                 23.0
## 35                34.2                 21.8
## 36                44.9                 37.5
## 37                41.3                 33.5
## 38                36.7                 27.3
## 39                33.4                 22.8
## 40                36.4                 21.8

1.5.2 relu+batch norm+act2 (500 epochs)

## # A tibble: 40 × 12
##     unit act         optimizer           trainRMSE_v1 testRMSE_v1 trainMAE_v1
##    <dbl> <fct>       <chr>                      <dbl>       <dbl>       <dbl>
##  1    32 relu        optimizer_adam()            1.78        2.13        34.8
##  2    64 relu        optimizer_adam()            1.55        1.89        27.8
##  3   128 relu        optimizer_adam()            1.45        1.85        23.8
##  4   256 relu        optimizer_adam()            1.46        1.86        23.0
##  5   512 relu        optimizer_adam()            1.48        1.83        23.9
##  6    32 exponential optimizer_adam()            2.08        2.39        43.2
##  7    64 exponential optimizer_adam()            1.90        2.32        38.3
##  8   128 exponential optimizer_adam()            1.75        2.22        34.0
##  9   256 exponential optimizer_adam()            1.77        2.25        34.6
## 10   512 exponential optimizer_adam()            1.76        2.27        34.3
## 11    32 relu        optimizer_adamax()          1.89        2.19        37.7
## 12    64 relu        optimizer_adamax()          1.61        2.00        29.8
## 13   128 relu        optimizer_adamax()          1.51        1.90        25.3
## 14   256 relu        optimizer_adamax()          1.46        1.84        23.4
## 15   512 relu        optimizer_adamax()          1.51        1.88        24.6
## 16    32 exponential optimizer_adamax()          2.39        2.73        50.7
## 17    64 exponential optimizer_adamax()          2.20        2.54        45.7
## 18   128 exponential optimizer_adamax()          2.04        2.49        41.3
## 19   256 exponential optimizer_adamax()          1.91        2.44        37.7
## 20   512 exponential optimizer_adamax()          1.83        2.38        35.5
## 21    32 relu        optimizer_nadam()           1.68        2.05        32.1
## 22    64 relu        optimizer_nadam()           1.48        1.85        25.9
## 23   128 relu        optimizer_nadam()           1.43        1.79        23.5
## 24   256 relu        optimizer_nadam()           1.52        1.83        25.6
## 25   512 relu        optimizer_nadam()           1.53        1.87        25.7
## 26    32 exponential optimizer_nadam()           1.91        2.37        38.6
## 27    64 exponential optimizer_nadam()           1.80        2.20        35.6
## 28   128 exponential optimizer_nadam()           1.77        2.27        35.2
## 29   256 exponential optimizer_nadam()           1.74        2.10        34.4
## 30   512 exponential optimizer_nadam()           1.80        2.12        35.4
## 31    32 relu        optimizer_rmsprop()         1.86        2.19        37.0
## 32    64 relu        optimizer_rmsprop()         1.55        1.96        28.9
## 33   128 relu        optimizer_rmsprop()         1.54        1.89        27.2
## 34   256 relu        optimizer_rmsprop()         1.50        1.90        25.8
## 35   512 relu        optimizer_rmsprop()         1.67        1.97        30.3
## 36    32 exponential optimizer_rmsprop()         1.88        2.27        38.7
## 37    64 exponential optimizer_rmsprop()         1.84        2.17        36.5
## 38   128 exponential optimizer_rmsprop()         1.81        2.20        36.3
## 39   256 exponential optimizer_rmsprop()         1.76        2.10        34.8
## 40   512 exponential optimizer_rmsprop()         1.81        2.15        33.5
##    testMAE_v1  nPara smoothed_trainRMSE_v1 smoothed_testRMSE_v1
##         <dbl>  <int>                 <dbl>                <dbl>
##  1       44.3   9264                  1.78                 2.13
##  2       38.0  20336                  1.54                 1.88
##  3       35.9  48624                  1.44                 1.82
##  4       36.4 129776                  1.43                 1.82
##  5       36.0 390384                  1.45                 1.77
##  6       50.8   9264                  2.06                 2.37
##  7       48.8  20336                  1.86                 2.27
##  8       46.4  48624                  1.67                 2.13
##  9       47.6 129776                  1.65                 2.11
## 10       48.2 390384                  1.61                 2.10
## 11       45.9   9264                  1.89                 2.19
## 12       40.6  20336                  1.60                 1.98
## 13       37.9  48624                  1.48                 1.86
## 14       36.0 129776                  1.41                 1.77
## 15       37.6 390384                  1.44                 1.80
## 16       58.3   9264                  2.34                 2.67
## 17       53.5  20336                  2.12                 2.44
## 18       52.4  48624                  1.92                 2.35
## 19       51.6 129776                  1.74                 2.23
## 20       50.4 390384                  1.65                 2.15
## 21       42.1   9264                  1.68                 2.04
## 22       36.8  20336                  1.47                 1.84
## 23       35.1  48624                  1.42                 1.77
## 24       36.9 129776                  1.50                 1.80
## 25       37.2 390384                  1.50                 1.83
## 26       49.5   9264                  1.90                 2.35
## 27       46.2  20336                  1.75                 2.15
## 28       47.3  48624                  1.68                 2.18
## 29       44.5 129776                  1.67                 2.03
## 30       44.6 390384                  1.74                 2.05
## 31       45.1   9264                  1.85                 2.18
## 32       39.8  20336                  1.53                 1.93
## 33       37.9  48624                  1.51                 1.85
## 34       38.4 129776                  1.46                 1.86
## 35       40.1 390384                  1.62                 1.91
## 36       47.5   9264                  1.84                 2.23
## 37       44.9  20336                  1.77                 2.11
## 38       46.0  48624                  1.68                 2.07
## 39       44.0 129776                  1.69                 2.03
## 40       44.5 390384                  1.75                 2.09
##    smoothed_testMAE_v1 smoothed_trainMAE_v1
##                  <dbl>                <dbl>
##  1                44.3                 34.8
##  2                37.8                 27.6
##  3                35.4                 23.4
##  4                35.5                 22.1
##  5                34.7                 22.7
##  6                50.3                 42.8
##  7                47.7                 37.3
##  8                44.4                 32.3
##  9                44.7                 32.1
## 10                44.7                 30.9
## 11                45.9                 37.7
## 12                40.3                 29.5
## 13                37.0                 24.5
## 14                34.4                 22.1
## 15                35.6                 22.7
## 16                57.0                 49.5
## 17                51.3                 43.9
## 18                49.5                 38.8
## 19                47.4                 34.4
## 20                45.6                 31.5
## 21                41.9                 31.9
## 22                36.6                 25.6
## 23                34.7                 23.0
## 24                36.3                 24.9
## 25                36.3                 24.9
## 26                49.1                 38.3
## 27                45.1                 34.5
## 28                45.3                 33.1
## 29                42.9                 32.7
## 30                43.2                 34.0
## 31                44.8                 36.7
## 32                39.3                 28.2
## 33                37.0                 26.1
## 34                37.5                 24.5
## 35                38.9                 28.8
## 36                46.7                 37.8
## 37                43.4                 34.7
## 38                43.5                 33.3
## 39                42.4                 32.8
## 40                43.1                 31.8

2. Data 2

2.1 Read data2 input and output

# Observations
fd.input <- readRDS('data2/fd_input.rds') # matrix
fd.output <- readRDS('data2/fd_output.rds') # data.frame
ref_amp <- readRDS('data2/ref_amp.rds') # matrix
trans_amp <- readRDS('data2/trans_amp.rds') # matrix


str(fd.input)
##  num [1:500, 1:4] 0.891 0.705 0.779 0.207 0.097 0.018 0.182 0.478 0.629 0.408 ...
##  - attr(*, "dimnames")=List of 2
##   ..$ : NULL
##   ..$ : chr [1:4] "a" "b" "tBCB" "r_MDA"
dim(ref_amp) # 500 curves, each row is a curve
## [1]  500 1001
dim(trans_amp) # 500 curves, each row is a curve
## [1]  500 1001

2.2 FPCA for data2 (Use eigen-decomposition procedure in doing FPCA)

output <- ref_amp # choose reflection amplitude curves as output

# covariance matrix
output_cov <- cov(output)
# eigen-decomposition
output.eigen <- eigen(output_cov)

# PC scores (Train and test output)
# Each row is a vector of PC scores for one observation
output_pc_scores <- output %*%
  output.eigen$vectors[,1:500]

# Use eigen-vectors as basis for projecting PC scores onto original dimension (240 --> 1001)
recovered_output <- output.eigen$vectors[,1:500] %*% t(output_pc_scores)
str(recovered_output) # Each column is a curve
##  num [1:1001, 1:500] 0.958 0.958 0.958 0.958 0.958 ...
##  - attr(*, "dimnames")=List of 2
##   ..$ : NULL
##   ..$ : chr [1:500] "Abs.s11.1.." "Abs.s11.2.." "Abs.s11.3.." "Abs.s11.4.." ...
# Recovered error
recovered_error <- mean(apply(t(recovered_output)-output,1,
                              function(x) sum(x^2)/1001))
recovered_error
## [1] 1.337268e-09
# display part of curves
par(mfrow = c(1,2))
plot(output[1,],ylab = 'amplitude',main = 'original curve 1',ylim = c(0,1))
plot(recovered_output[,1],ylab = 'reflection amplitude',main = 'recovered curve 1',ylim =c(0,1))

plot(output[4,],ylab = 'amplitude',main = 'original curve 4',ylim =c(0,1))
plot(recovered_output[,4],ylab = 'reflection amplitude',main = 'recovered curve 4',ylim = c(0,1))

2.3 Do Linear regression for fpca output

library(groupdata2) # package to split data into k fold
library(tidyverse)

# divide data into 5 folds
set.seed(1)
folds <- fold(data.frame(fd.input),5,method = 'n_dist')$.folds

# both vectors are generated to evalute the train and test error in 5-folds cross validation procedure
train_MSE <- rep(0,5) # train mean square error for each fold
test_MSE <- rep(0,5) # test mean square error for each fold

train_MAE <- rep(0,5) # train mean absolute error for each fold
test_MAE <- rep(0,5) # test mean absolute error for each fold

train_RMSE <- rep(0,5) # train root mean square error for each fold
test_RMSE <- rep(0,5) # test root mean square error for each fold

train_SE <- rep(0,5) # train sum of square residual for each fold
test_SE <- rep(0,5) # test sum of square residual for each fold

# 5-folds cross-validation
for(i in 1:5)
{
  # the first 4 variables are 'a,b,tBCB,r_MDA"
  train_set <- as_tibble(cbind(fd.input[folds !=i,],output_pc_scores[folds!=i,]))
  colnames(train_set)[5:504] <- rep('coor',500)
  
  
  # 240 linear models used to predict 240 PC scores
  # the first 4 columns are input variables
  my_lms <- lapply(5:504,function(x)
    lm(coor ~ a+b+tBCB+r_MDA,data = train_set[,c(1:4,x)]))
  # row number is the number of train sets
  # col number is the number of PC scores
  train_num <- sum(folds!=i)
  test_num <- sum(folds ==i)
  # fitted PC scores
  fit_coor <- matrix(NA,nrow = train_num,ncol = 500)
  # predicted PC scores
  predict_coor <- matrix(NA,nrow = test_num,ncol = 500)
  for(j in 1:500)
  {
    fit_coor[,j] <- my_lms[[j]]$fitted.values
    predict_coor[,j] <- predict(my_lms[[j]],data.frame(a = fd.input[folds ==i,1],
                                                       b = fd.input[folds == i,2],
                                                       tBCB = fd.input[folds == i,3],
                                                       r_MDA = fd.input[folds == i,4]))
  }
  fit_curve <- output.eigen$vectors[,1:500] %*%
    t(fit_coor)
  train_MSE[i] <- mean(apply(t(fit_curve)-output[folds != i,],
                                 1,function(x) sum(x^2)/1001))
  train_MAE[i] <- mean(abs(t(fit_curve)-output[folds !=i,]))
  
  predict_curve <- output.eigen$vectors[,1:500] %*%
      t(predict_coor)
  test_MSE[i] <- mean(apply(t(predict_curve)-output[folds == i,],
                                1,function(x) sum(x^2)/1001))
  test_MAE[i] <- mean(abs(t(predict_curve)-output[folds ==i,]))
  
    # Root mean square error
  train_RMSE[i] <- sqrt(train_MSE[i])
  test_RMSE[i] <- sqrt(test_MSE[i])

  # Square error
  train_SE[i] <- sum(folds !=i)*train_MSE[i]
  test_SE[i] <- sum(folds == i)*test_MSE[i]
  
  # plot part of fitted and predicted curves
  par(mfrow = c(2,2))
  
}
par(mfrow = c(2,2))
plot(fit_curve[,1],ylab = 'amplitude',main = 'fitted curve 1',ylim = c(0,1))
plot(output[which(folds!=i)[1],],ylab = 'reflection amplitude',main = 'original curve 1',ylim= c(0,1))
plot(predict_curve[,1],ylab = 'amplitude',main = 'fitted curve 4',ylim = c(0,1))
plot(output[which(folds==i)[1],],ylab = 'reflection amplitude',main = 'original curve 4',ylim = c(0,1))

2.4 Error metrics

  • MSE - Mean square error
  • RMSE - Root mean square error
  • SE - Residual sum of squares
  • MAE - Mean absolute error
  • MSE_v1 - Modified Mean square error
  • RMSE_v1 - Modified Root mean square error
  • SE_v1 - Modified Residual sum of squares
  • MAE_v1 - Modified Mean absolute error
tibble(train_MSE,test_MSE,train_RMSE,test_RMSE,train_SE,test_SE,train_MAE,test_MAE)
## # A tibble: 5 × 8
##   train_MSE test_MSE train_RMSE test_RMSE train_SE test_SE train_MAE test_MAE
##       <dbl>    <dbl>      <dbl>     <dbl>    <dbl>   <dbl>     <dbl>    <dbl>
## 1   0.00417  0.00446     0.0646    0.0668     1.67   0.446    0.0403   0.0443
## 2   0.00435  0.00373     0.0660    0.0611     1.74   0.373    0.0416   0.0388
## 3   0.00424  0.00425     0.0651    0.0652     1.70   0.425    0.0408   0.0417
## 4   0.00416  0.00445     0.0645    0.0667     1.66   0.445    0.0408   0.0408
## 5   0.00407  0.00486     0.0638    0.0697     1.63   0.486    0.0405   0.0418
tibble(train_MSE_v1 = 1001*train_MSE,test_MSE_v1 = 1001*test_MSE,
       train_RMSE_v1 = sqrt(1001)*train_RMSE,
       test_RMSE_v1 = sqrt(1001)*test_RMSE,
       train_SE_v1 = 1001*train_SE,
       test_SE_v1 = 1001*test_SE,
       train_MAE_v1 = 1001*train_MAE,
       test_MAE_v1 = 1001*test_MAE)
## # A tibble: 5 × 8
##   train_MSE_v1 test_MSE_v1 train_RMSE_v1 test_RMSE_v1 train_SE_v1 test_SE_v1
##          <dbl>       <dbl>         <dbl>        <dbl>       <dbl>      <dbl>
## 1         4.17        4.46          2.04         2.11       1670.       446.
## 2         4.36        3.74          2.09         1.93       1742.       374.
## 3         4.24        4.26          2.06         2.06       1697.       426.
## 4         4.17        4.45          2.04         2.11       1666.       445.
## 5         4.07        4.87          2.02         2.21       1628.       487.
##   train_MAE_v1 test_MAE_v1
##          <dbl>       <dbl>
## 1         40.3        44.3
## 2         41.6        38.9
## 3         40.8        41.8
## 4         40.9        40.9
## 5         40.6        41.8