create_datafile<-function(filepath,pattern,name){
  results=c()
  lm_files=list.files(filepath,pattern)
  for (file_id in 1:length(lm_files)){
    results_file=read.csv(paste(filepath,lm_files[file_id],sep=""),header=T)
    results_file=cbind(
      rep(unlist(strsplit(lm_files[file_id],".txt"))[1],nrow(results_file)),
      rep(name,nrow(results_file)),
      results_file)
    names(results_file)[1]="imu"
    names(results_file)[2]="model"
    results=rbind(results,cbind(results_file)
    )
  }
  return(results)
}

Read https://followthedata.wordpress.com/2012/06/02/practical-advice-for-machine-learning-bias-variance/

MLP: selecting best model

#results=create_datafile(paste(results_dir,"/",sep=""),".*_lm.*","mlr")
#results=rbind(results,create_datafile(paste(results_dir,"/",sep=""),".*_ma.*","ma"))
results=c()
results=rbind(results,create_datafile(paste(results_dir,"/data/",sep=""),"*mlp_.*node_5.*","mlp5"))
results=rbind(results,create_datafile(paste(results_dir,"/data/",sep=""),"*mlp_.*node_10.*","mlp10"))
results=rbind(results,create_datafile(paste(results_dir,"/data/",sep=""),"*mlp_.*node_20.*","mlp20"))   
results=rbind(results,create_datafile(paste(results_dir,"/data/",sep=""),"*mlp_.*node_30.*","mlp30"))   
results=rbind(results,create_datafile(paste(results_dir,"/data/",sep=""),"*mlp_.*node_40.*","mlp40"))   
results=rbind(results,create_datafile(paste(results_dir,"/data/",sep=""),"*mlp_.*node_60.*","mlp60"))   
results=rbind(results,create_datafile(paste(results_dir,"/data/",sep=""),"*mlp_.*node_80.*","mlp80"))  
results=rbind(results,create_datafile(paste(results_dir,"/data/",sep=""),"*mlp_.*node_100.*","mlp100"))  
results=rbind(results,create_datafile(paste(results_dir,"/data/",sep=""),"*lm_.*","mlr"))   
results=rbind(results,create_datafile(paste(results_dir,"/data/",sep=""),"*ma_.*","ma"))   
xyplot(X1+X2+X6~timedelay|imu,data=results,group=model,type='l', auto.key = T,scales=list(relation='free',x=list(cex=0.7)),cex=0.3,layout=c(2,1))

Boxplot along all the lag length

XSNES IMU

bw_data=group_by(results,imu,model,imu,timedelay) %>% select(X1,X2,X6,timedelay) %>% filter(imu=='glad_xsns1_target')
## Adding missing grouping variables: `imu`, `model`
bwplot(X1~factor(timedelay)|model,data=bw_data,scales=list(relation='same',x=list(cex=0.5)),do.out = T,cex=0.1,
       cex.labels=0.1,alpha=0.6,varname.cex=0.1,box.width=1/2,layout=c(length(levels(bw_data$model)),1))

bwplot(X2~factor(timedelay)|model,data=bw_data,scales=list(relation='same',x=list(cex=0.5)),do.out = T,cex=0.1,
        cex.labels=0.1,alpha=0.6,varname.cex=0.1,layout=c(length(levels(bw_data$model)),1))

bwplot(X6~factor(timedelay)|model,data=bw_data,scales=list(relation='same',x=list(cex=0.5)) ,do.out = T,cex=0.1,
        cex.labels=0.1,alpha=0.6,varname.cex=0.1,layout=c(length(levels(bw_data$model)),1))

Boxplot along all the lag length

XBOW IMU

bw_data=group_by(results,imu,model,imu,timedelay) %>% select(X1,X2,X6,timedelay) %>% filter(imu=='glad_xbow1_target')
## Adding missing grouping variables: `imu`, `model`
bwplot(X1~factor(timedelay)|model,data=bw_data,scales=list(relation='same',x=list(cex=0.5)),do.out = T,cex=0.1,
       cex.labels=0.1,alpha=0.6,varname.cex=0.1,box.width=1/2,layout=c(length(levels(bw_data$model)),1))

bwplot(X2~factor(timedelay)|model,data=bw_data,scales=list(relation='same',x=list(cex=0.5)),do.out = T,cex=0.1,
        cex.labels=0.1,alpha=0.6,varname.cex=0.1,layout=c(length(levels(bw_data$model)),1))

bwplot(X6~factor(timedelay)|model,data=bw_data,scales=list(relation='same',x=list(cex=0.5)) ,do.out = T,cex=0.1,
        cex.labels=0.1,alpha=0.6,varname.cex=0.1,layout=c(length(levels(bw_data$model)),1))

Boxplot along all the lag length

GLADIATOR IMU

bw_data=group_by(results,imu,model,imu,timedelay) %>% select(X1,X2,X6,timedelay) %>% filter(imu=='glad_data')
## Adding missing grouping variables: `imu`, `model`
bwplot(X1~factor(timedelay)|model,data=bw_data,scales=list(relation='same',x=list(cex=0.5)),do.out = T,cex=0.1,
       cex.labels=0.1,alpha=0.6,varname.cex=0.1,box.width=1/2,layout=c(length(levels(bw_data$model)),1))

bwplot(X2~factor(timedelay)|model,data=bw_data,scales=list(relation='same',x=list(cex=0.5)),do.out = T,cex=0.1,
        cex.labels=0.1,alpha=0.6,varname.cex=0.1,layout=c(length(levels(bw_data$model)),1))

bwplot(X6~factor(timedelay)|model,data=bw_data,scales=list(relation='same',x=list(cex=0.5)) ,do.out = T,cex=0.1,
        cex.labels=0.1,alpha=0.6,varname.cex=0.1,layout=c(length(levels(bw_data$model)),1))

Boxplot along all the lag length

XTAL IMU

bw_data=group_by(results,imu,model,imu,timedelay) %>% select(X1,X2,X6,timedelay) %>% filter(imu=='glad_xtal_target')
## Adding missing grouping variables: `imu`, `model`
bwplot(X1~factor(timedelay)|model,data=bw_data,scales=list(relation='same',x=list(cex=0.5)),do.out = T,cex=0.1,
       cex.labels=0.1,alpha=0.6,varname.cex=0.1,box.width=1/2,layout=c(length(levels(bw_data$model)),1))

bwplot(X2~factor(timedelay)|model,data=bw_data,scales=list(relation='same',x=list(cex=0.5)),do.out = T,cex=0.1,
        cex.labels=0.1,alpha=0.6,varname.cex=0.1,layout=c(length(levels(bw_data$model)),1))

bwplot(X6~factor(timedelay)|model,data=bw_data,scales=list(relation='same',x=list(cex=0.5)) ,do.out = T,cex=0.1,
        cex.labels=0.1,alpha=0.6,varname.cex=0.1,layout=c(length(levels(bw_data$model)),1))

Boxplot of the X1 sensor model with the less RMSE on the 10-folds CV

# Select the models with the smallest RMSE
best_models=group_by(results,model,imu,timedelay) %>% summarise(mean=mean(X1),sd=sd(X1)) %>% filter(mean==min(mean)) %>% arrange(desc(imu))
# adding the desaggregated lag info
bw_data=inner_join(best_models,results, by=c("timedelay","imu","model"))
bwplot(X1~model|imu,groups=timedelay,data=bw_data,scale='free'
       ,auto.key = F
       ,panel = panel.superpose
       ,panel.groups=function(x,y,group.number,...){
         xt <- x[x==min(x)] # find latest year
         yt <- y[y==min(y)] # find value at latest year 
         panel.text(x,yt,labels=levels(factor(bw_data$timedelay))[group.number],pos=1,...)
         panel.bwplot(x,y,...)

             },ylab="X1"
      
         )

Boxplot of the X2 sensor model with the less RMSE on the 10-folds CV

# Select the models with the smallest RMSE
best_models=group_by(results,model,imu,timedelay) %>% summarise(mean=mean(X2),sd=sd(X2)) %>% filter(mean==min(mean)) %>% arrange(desc(imu))
# adding the desaggregated lag info
bw_data=inner_join(best_models,results, by=c("timedelay","imu","model"))
bwplot(X2~model|imu,groups=timedelay,data=bw_data,scale='free'
       ,auto.key = F
       ,panel = panel.superpose
       ,panel.groups=function(x,y,group.number,...){
         xt <- x[x==min(x)] # find latest year
         yt <- y[y==min(y)] # find value at latest year 
         panel.text(x,yt,labels=levels(factor(bw_data$timedelay))[group.number],pos=1,...)
         panel.bwplot(x,y,...)

             },ylab="X2"
      
         )

Boxplot of the X6 sensor model with the less RMSE on the 10-folds CV

# Select the models with the smallest RMSE
best_models=group_by(results,model,imu,timedelay) %>% summarise(mean=mean(X6),sd=sd(X6)) %>% filter(mean==min(mean)) %>% arrange(desc(imu))
# adding the desaggregated lag info
bw_data=inner_join(best_models,results, by=c("timedelay","imu","model"))
bwplot(X6~model|imu,groups=timedelay,data=bw_data,scale='free'
       ,auto.key = F
       ,panel = panel.superpose
       ,panel.groups=function(x,y,group.number,...){
         xt <- x[x==min(x)] # find latest year
         yt <- y[y==min(y)] # find value at latest year 
         panel.text(x,yt,labels=levels(factor(bw_data$timedelay))[group.number],pos=1,...)
         panel.bwplot(x,y,...)

             },ylab="X6"
      
         )