Outline

data

1. ERPdata : maybe a formal data for package

preprocessing

1. list_to_df

2. data_select

3. data_summarize

4. downsample

EDA

1.edaplot


Data

1. ERPdata
2. NIRSdata
library(ERP);require(mnormt);require(fdrtool);library(ggplot2);library(dplyr)
library(erpR);require(akima);library(reshape2);library(boot)
data(ERPsets)
load("ERPdata.RData")

preprocessing

1. list_to_df
2. data_select
3. data_summarize

###### 4. Down sample

list_to_df
It is a function that could transformed a list to a dataframe for our package.
###list2data
list_to_df <- function(list_data,frames){
        # some check function
        for (i in 1:length(list_data)){
                list_data[[i]]$frames <-frames
                list_data[[i]]$list.name <- names(list_data)[i]
        }
        Data_list <- lapply(list_data,melt,id=c("frames","list.name"))
        Data_list <- lapply(Data_list,reshape,
                          timevar = "frames",idvar = c("variable","list.name"),
                          direction = "wide")
        Ana_data <- Data_list[[1]]
        for (i in 2:length(ERPsets)){
                Ana_data <- rbind(Ana_data,Data_list[[i]])
        }
        colnames(Ana_data)[2] <- "Channel"
        rownames(Ana_data) <- 1:dim(Ana_data)[1]
        return(Ana_data)
}
ERP_df <- list_to_df(list_data=ERPsets, # input list_data
                    frames= 1:426) # input frames
ERP_df <- AddExpCondSub(data = ERP_df, # input the transformed data 
                        list.name_col = 1) # list.name column
#ERP_df <- ERP_df[,-1] # remove list name column, Depend on user's need
head(ERP_df[,c(1:3,427:430)],3) # look at the data
        list.name Channel     value.1 value.425 value.426 Experiment
1 Exp1_word_subj1     Fp1  0.36995740 14.160980  9.971283       Exp1
2 Exp1_word_subj1     Fp2 -0.02759907 13.750350  9.678596       Exp1
3 Exp1_word_subj1      F3  0.42971300  6.269233  4.413941       Exp1
  Condition
1      word
2      word
3      word
2. data_select
It could select data by subject and channel
data_select <- function(data,frames,datacol,subjcol=NULL,chancol=NULL,othvarcol=NULL,
                        select_subj=NULL,
                        select_chan=NULL,...){
        # some check function
        subj_select <- function(data,frames,datacol,subjcol,chancol=NULL,othvarcol=NULL,
                       select_subj,...){
                dta <- data
                num <- length(select_subj)
                data_list <- list()
                for (i in 1:num){
                        data_new <- subset(dta,dta[,subjcol]==select_subj[i])
                        data_list[[i]] <- data_new
                }
                data_select <- data_list[[1]]
                if (num != 1) {
                        for (i in 2:num) { 
                                data_select <- rbind(data_select,data_list[[i]])
                        }
                }
                rownames(data_select) <- 1:dim(data_select)[1]
                return (data_select)
        }
        chan_select <- function(data,frames,datacol,subjcol=NULL,chancol,othvarcol=NULL,
                       select_chan,...){
                dta <- data
                num <- length(select_chan)
                data_list <- list()
                for (i in 1:num){
                        data_new <- subset(dta,dta[,chancol]==select_chan[i])
                        data_list[[i]] <- data_new
                }
                data_select <- data_list[[1]]
                if (num != 1){
                        for (i in 2:num){
                                data_select <- rbind(data_select,data_list[[i]])
                        }
                }
                rownames(data_select) <- 1:dim(data_select)[1]
                return (data_select)
        }
        joint_select <- function(data,frames,datacol,subjcol,chancol,othvarcol=NULL,
                        select_subj,
                        select_chan,...){
                dta <- data
                ind_data <- subj_select(dta,frames,datacol,subjcol,chancol,othvarcol,
                               select_subj = select_subj)
                ind_ele_data <- chan_select(ind_data,frames,datacol,subjcol,chancol,othvarcol,
                               select_chan = select_chan)
                rownames(ind_ele_data) <- 1 :dim(ind_ele_data)[1]
                return(ind_ele_data)
        }
        if (is.null(select_subj)==FALSE & is.null(select_chan)==FALSE){
                dta <- joint_select(data = data,frames = frames,
                                    datacol,subjcol,chancol,othvarcol,
                                    select_subj=select_subj,
                                    select_chan=select_chan)
        } else if (is.null(select_subj)==FALSE & is.null(select_chan)== TRUE) { 
                dta <- subj_select(data=data,frames=frames,
                                   datacol,subjcol,chancol,othvarcol,
                                   select_subj=select_subj)
        } else if (is.null(select_chan)==FALSE & is.null(select_subj)== TRUE) {
                dta <- chan_select(data,frames,
                                  datacol,subjcol,chancol,othvarcol,
                                  select_chan=select_chan)
        } else {
                dta <- data
        }
        return(dta)
}
S1S10Fp1Fp2 <- data_select(data = ERPdata,
                          frames = 1:426,
                          datacol = 2:427,
                          subjcol = 430,
                          chancol = 1,
                          othvarcol = c(428:429,431:432),
                          select_subj = c("subj1","subj2"),
                          select_chan=c("CZ","Fp1"))
CZ <- data_select(data = ERPdata,
                          frames = 1:426,
                          datacol = 2:427,
                          subjcol = 430,
                          chancol = 1,
                          othvarcol = c(428:429,431:432),
                          #select_subj = c("subj1","subj10"),
                          select_chan=c("CZ"))
S1S10 <- data_select(data = ERPdata,
                     frames = 1:426,
                     datacol = 2:427,
                     subjcol = 430,
                     chancol = 1,
                     othvarcol = c(428:429,431:432),
                     select_subj = c("subj1","subj10"))
dim(S1S10Fp1Fp2);dim(CZ);dim(S1S10)
## [1]   8 432
## [1]  40 432
## [1] 136 432
3. Data_summarize :
We could aggregate data by the column like conditions, subjects or channel.
data_summarize <- function(data,frames,datacol,subjcol=NULL,chancol=NULL,othvarcol=NULL,
                          summarycol,
                          fun=mean,
                          select_subj=NULL,
                          select_chan=NULL,...){ 
        #some check function
        options(warn=-1) # should close the warnings?
        # selection
        dta <- data_select(data,frames,datacol,subjcol,chancol,othvarcol,
                    select_subj,
                    select_chan)
        # process
        agglength <- length(summarycol)
        aggvar_list <- list(dta[,summarycol[1]])
        if (agglength > 1){
                for (i in 2:agglength ){
                        aggvar_list <- append(aggvar_list,list(dta[,summarycol[i]]))
                }
        }
        aggdata <- aggregate(dta[,datacol],by=aggvar_list,
                             fun,...)
        aggdata <- aggdata[,1:(agglength+length(datacol))]
        for (i in 1: agglength){
                colnames(aggdata)[i] <- colnames(dta)[summarycol[i]]
        }
        rownames(aggdata) <- 1:dim(aggdata)[1] 
        return(aggdata)
}
A <-data_summarize (data = ERPdata,
                    frames = 1:426,
                    datacol = 2:427,
                    #you could put all elements you want to aggregate in datacol argument
                    subjcol = 430,
                    chancol = 1,
                    othvarcol = c(428:429,431:432),
                    summarycol= c(1,429),
                    fun=median,
                    
                    # could also select data
                    select_chan = c("CZ","Fp1"), 
                    select_subj = c("subj1","subj10"))
tail(A[,1:6]);dim(A)
  Channel Condition    value.1    value.2    value.3    value.4
1     Fp1   nonword -1.4049110 -1.9667935 -1.7905150 -1.7634650
2      CZ   nonword -1.8309385 -2.4948925 -2.1159490 -1.9471855
3     Fp1      word  0.4695684  0.1779768 -0.5842498 -0.9740931
4      CZ      word  0.8391661  0.8317206  0.2696740  0.1090901
[1]   4 428
4. Down Sample:
We could down sample the signal
downsample <- function(data,
                       datacol,
                       binwidth=10,
                       movinginterval=NULL) {
        if (is.null(movinginterval) == FALSE) {
             if (movinginterval >= binwidth){stop("movinginterval should not bigger than binwidth!")}
                dta <- data
                dta_signal <- dta[,datacol]
                dta_othvar <- dta[,-datacol]
                num1 <- (dim(dta_signal)[2] - (dim(dta_signal)[2] %% binwidth)) /binwidth
                num2 <- dim(dta_signal)[2] %% binwidth
                dta_downsample <- data.frame(melt(apply(dta_signal[,1:binwidth],1,mean)))
                i = movinginterval - 1
                while ((binwidth+i) <= dim(dta_signal)[2]){
                        dta_downsample <- cbind(dta_downsample,melt(apply(dta_signal[,(1+i):(binwidth+i)],1,mean)))
                        i = i + movinginterval -1
                }
                if (num2 >= 1) {
                        dta_downsample <- cbind(dta_downsample,
                                                value=melt(apply(dta_signal[,(1+i):dim(dta_signal)[2]],1,mean)))
                }
                colnames(dta_downsample) <- paste("value",1:dim(dta_downsample)[2],sep=".")
                dta_final  <- cbind(dta_othvar,dta_downsample)
        } else {
                dta <- data
                dta_signal <- dta[,datacol]
                dta_othvar <- dta[,-datacol]
                num1 <- (dim(dta_signal)[2] - (dim(dta_signal)[2] %% binwidth))/binwidth
                num2 <- dim(dta_signal)[2] %% binwidth
                dta_downsample <- data.frame(melt(apply(dta_signal[,1:binwidth],1,mean)))
                for (i in 1 : num1-1){
                        dta_downsample <- cbind(dta_downsample,
                                        melt(apply(dta_signal[,(i*binwidth+1):((i+1)*binwidth)],
                                                   1,mean)))
                }
                if (num2 == 1) {
                        dta_downsample <- cbind(dta_downsample,
                                         value=dta_signal[,(num1*binwidth+num2)])
                        ###### notice
                }
                if (num2 > 1) {
                        dta_downsample <- cbind(dta_downsample,
                                melt(apply(dta_signal[,(num1*binwidth+1):(num1*binwidth+num2)],
                                           1,mean)))   
                }
                dta_downsample <- dta_downsample[,-1]
                colnames(dta_downsample) <- paste("value",1:dim(dta_downsample)[2],sep=".")
                dta_final  <- cbind(dta_othvar,dta_downsample)
        }
        return(dta_final)
}
down1 <- downsample(data = ERPdata,
                   datacol = 2:427,
                   binwidth = 10,
                   movinginterval=NULL)
down2 <- downsample(data = ERPdata,
                   datacol = 2:427,
                   binwidth = 10,
                   movinginterval=9) # movinginterval shoud not bigger than binwidth
dim(down1)  ;  dim(down2)
[1] 1360   49
[1] 1360   60

EDA

##### 1.edaplot

1.edaplot
The function “edaplot”" uses ggplot2 grammar, so it has several flexible options.
# group comparison please put in data after aggregate_raw
edaplot <- function(data,frames=NULL,datacol,subjcol=NULL,chancol=NULL,othvarcol=NULL,
                             outlinesub=NULL,outcolor="red",
                             select_subj=c(NULL),
                             select_chan=c(NULL),...){
        #some check functions
        #if (is.null(frames) == FALSE) 
        #        if (length(frames) != (ncol(data)-1-length(othvarcol)))
        #                stop(paste("frames should be either null or of length",
        #                           (ncol(data)-1-length(othvarcol))))
        #if (is.null(frames) == FALSE) {
        #        if (any(frames != sort(frames))) 
        #                stop("frames should be an ascending sequence of integers")
        #        }
        #if (is.null(frames)) 
        #        frames = 1:(ncol(data)-1-length(othvarcol))
        #selection
        dta <- data_select(data,frames,datacol,subjcol,chancol,othvarcol,
                        select_subj,
                        select_chan,...)
        # plot
        subvar <- variable.names(dta)[subjcol]
        dta$groupvar <- rownames(dta)
        datalong <- melt(dta,
                         id=c(variable.names(dta)[c(subjcol,chancol,othvarcol)],
                              "groupvar"))
        datalong <- datalong[order(datalong$groupvar),]
        datalongorder <- datalong
        datalongorder$frames <- rep(frames,length(datalongorder[,1])/length(frames))
        if (is.null(outlinesub) == FALSE){  # how to outline several subjects (and color)
                data2 <- subset(datalongorder,datalongorder[,1]==outlinesub)
                plot <- ggplot(datalongorder,
                               aes(x=frames,y=value,group=groupvar,...))+
                        geom_line()+
                        geom_line(data=data2,aes(x=frames,y=value),col=outcolor)
        # need warning for covering geom_line()
        } else {
                plot <- ggplot(datalongorder,
                               aes(x=frames,y=value,group=groupvar))+
                        geom_line()
        }
        return(plot)
}
#Although Full data (all trials) work fine, I recommened that the argument erpdata should be a single subject data or an aggregate data.
edaplot(ERPdata,
        frames = 1:426,
        datacol=2:427,
        subjcol=430,
        chancol=1,
        othvarcol=c(428:429,431:432),
        
        # choose the subject you want to show(optional)
        select_sub = c("subj9","subj1","subj5","subj8"), 
        # choose the channel you want to show(optional)
        select_chan = c("F3","F4"))+
        
        facet_grid(Channel~Condition)

edaplot(ERPdata,
        frames = 1:426,
        datacol=2:427,
        subjcol=430,
        chancol=1,
        othvarcol=c(428:429,431:432),
        
        # choose the subject you want to show(optional)
        select_sub = c("subj9","subj1","subj5","subj8"), 
        # choose the channel you want to show(optional)
        select_chan = c("F3","F4"),
        
        # highlight a single subject with the color you want
        outlinesub="subj9",outcolor = "blue")+
        
        facet_grid(Channel~Condition)

edaplot(ERPdata,
        frames=1:426,
        datacol = 2:427,subjcol=430,chancol=1,othvarcol=c(428:429,431:432),
        select_chan = c("F3","F4","CZ"),
        select_sub = c("subj9","subj1","subj5","subj8"))+
        
        # One subject one color
        geom_line(aes(col=Subject))+  # will cover outline sub
        
        facet_grid(Condition~Channel)

edaplot(ERPdata,
        frames=1:426,
        datacol = 2:427,subjcol=430,chancol=1,othvarcol=c(428:429,431:432),
        select_chan = c("F3","F4","CZ"))+
        
        #  Color depend on IQ (Size , alpha either)
        geom_line(aes(col=IQ,alpha=Channel,size=Condition))+ 
        facet_grid(Condition~Channel)

edaplot(ERPdata,
        frames=1:426,
        datacol = 2:427,subjcol=430,chancol=1,othvarcol=c(428:429,431:432),
        select_chan = c("F3","F4","CZ"))+
        # One Condition one color
        geom_line(aes(col=Condition))+
        facet_grid(.~Channel)

edaplot(ERPdata,
        frames=1:426,
        datacol = 2:427,subjcol=430,chancol=1,othvarcol=c(428:429,431:432),
        select_chan = c("F3","F4","CZ"))+
        # One Condition one color
        geom_line(aes(col=Condition))+
        facet_grid(.~Channel)+
        scale_colour_manual(values=c("red","blue"),name="Cond",label=c("A","B"))

edaplot(ERPdata,
        frames=1:426,
        datacol = 2:427,subjcol=430,chancol=1,othvarcol=c(428:429,431:432),
        select_chan = c("F3","F4","CZ"))+
        geom_line(aes(col=Condition))+
        facet_grid(Condition~Channel)

edaplot(ERPdata,
        frames=1:426,
        datacol = 2:427,subjcol=430,chancol=1,othvarcol=c(428:429,431:432),
        select_chan = c("F3","F4","CZ"))+
        geom_line(aes(col=Condition))+
        facet_grid(Condition~Channel)+
        # put on the summary line NOTE: need a group=NULL argument 
        stat_summary(aes(group=NULL),fun.y = "mean", 
                     colour = "purple", size = 0.5, geom = "line")

edaplot(ERPdata,
        frames=1:426,
        datacol = 2:427,subjcol=430,chancol=1,othvarcol=c(428:429,431:432),
        select_chan = c("F3","F4","CZ"))+
        geom_line(aes(col=Condition))+
        facet_grid(Condition~Channel)+
        theme_bw()+
        theme(legend.position="top")+
        xlim(-100,500)+
        ylim(-50,50)+
        labs(list(title = "Flexible",x="time",y="signal"))+
        geom_vline(xintercept = c(0,213,426),col="yellow")