ERP package

Outline

data

1. ERPdata : maybe a formal data for package

preprocessing

1. list2df

2. data_select

3. data_summarize

EDA

1.edaplot

2.ciplot

Data

1. ERPsets : a list dataset from erpR package: 20 subjects , 2 conditions (40 lists).

2. ERPdata : a data revised from ERPsets

library(ERP);require(mnormt);require(fdrtool);library(ggplot2);library(dplyr)
library(erpR);require(akima);library(reshape2);library(boot)
data(ERPsets)
load("ERPdata.RData")

preprocessing

1. list2df

2. data_select

3. data_summarize

1. list_to_df

It is a function that could transformed a list to a dataframe for our package.

###list2data
list_to_data <- function(list_data,frames){
        # some check function
        for (i in 1:length(list_data)){
                list_data[[i]]$frames <-frames
                list_data[[i]]$list.name <- names(list_data)[i]
        }
        Data_list <- lapply(list_data,melt,id=c("frames","list.name"))
        Data_list <- lapply(Data_list,reshape,
                          timevar = "frames",idvar = c("variable","list.name"),
                          direction = "wide")
        Ana_data <- Data_list[[1]]
        for (i in 2:length(ERPsets)){
                Ana_data <- rbind(Ana_data,Data_list[[i]])
        }
        colnames(Ana_data)[2] <- "Channel"
        rownames(Ana_data) <- 1:dim(Ana_data)[1]
        return(Ana_data)
}

ERP_df <- list_to_data(list_data=ERPsets, # input list_data
                    frames= 1:426) # input frames
ERP_df <- AddExpCondSub(data = ERP_df, # input the transformed data 
                        list.name_col = 1) # list.name column
#ERP_df <- ERP_df[,-1] # remove list name column, Depend on user's need
head(ERP_df[,c(1:3,427:430)],3) # look at the data

        list.name Channel     value.1 value.425 value.426 Experiment
1 Exp1_word_subj1     Fp1  0.36995740 14.160980  9.971283       Exp1
2 Exp1_word_subj1     Fp2 -0.02759907 13.750350  9.678596       Exp1
3 Exp1_word_subj1      F3  0.42971300  6.269233  4.413941       Exp1
  Condition
1      word
2      word
3      word

2. data_select

It could select data by subject and channel

data_select <- function(data,frames,datacol,subjcol=NULL,chancol=NULL,othvarcol=NULL,
                        select_subj=NULL,
                        select_chan=NULL,...){
        # some check function
        subj_select <- function(data,frames,datacol,subjcol,chancol=NULL,othvarcol=NULL,
                       select_subj,...){
                dta <- data
                num <- length(select_subj)
                data_list <- list()
                for (i in 1:num){
                        data_new <- subset(dta,dta[,subjcol]==select_subj[i])
                        data_list[[i]] <- data_new
                }
                data_select <- data_list[[1]]
                if (num != 1) {
                        for (i in 2:num) { 
                                data_select <- rbind(data_select,data_list[[i]])
                        }
                }
                rownames(data_select) <- 1:dim(data_select)[1]
                return (data_select)
        }
        chan_select <- function(data,frames,datacol,subjcol=NULL,chancol,othvarcol=NULL,
                       select_chan,...){
                dta <- data
                num <- length(select_chan)
                data_list <- list()
                for (i in 1:num){
                        data_new <- subset(dta,dta[,chancol]==select_chan[i])
                        data_list[[i]] <- data_new
                }
                data_select <- data_list[[1]]
                if (num != 1){
                        for (i in 2:num){
                                data_select <- rbind(data_select,data_list[[i]])
                        }
                }
                rownames(data_select) <- 1:dim(data_select)[1]
                return (data_select)
        }
        joint_select <- function(data,frames,datacol,subjcol,chancol,othvarcol=NULL,
                        select_subj,
                        select_chan,...){
                dta <- data
                ind_data <- subj_select(dta,frames,datacol,subjcol,chancol,othvarcol,
                               select_subj = select_subj)
                ind_ele_data <- chan_select(ind_data,frames,datacol,subjcol,chancol,othvarcol,
                               select_chan = select_chan)
                rownames(ind_ele_data) <- 1 :dim(ind_ele_data)[1]
                return(ind_ele_data)
        }
        if (is.null(select_subj)==FALSE & is.null(select_chan)==FALSE){
                dta <- joint_select(data = data,frames = frames,
                                    datacol,subjcol,chancol,othvarcol,
                                    select_subj=select_subj,
                                    select_chan=select_chan)
        } else if (is.null(select_subj)==FALSE & is.null(select_chan)== TRUE) { 
                dta <- subj_select(data=data,frames=frames,
                                   datacol,subjcol,chancol,othvarcol,
                                   select_subj=select_subj)
        } else if (is.null(select_chan)==FALSE & is.null(select_subj)== TRUE) {
                dta <- chan_select(data,frames,
                                  datacol,subjcol,chancol,othvarcol,
                                  select_chan=select_chan)
        } else {
                dta <- data
        }
        return(dta)
}

S1S10Fp1Fp2 <- data_select(data = ERPdata,
                          frames = 1:426,
                          datacol = 2:427,
                          subjcol = 430,
                          chancol = 1,
                          othvarcol = c(428:429,431:432),
                          select_subj = c("subj1","subj2"),
                          select_chan=c("CZ","Fp1"))
CZ <- data_select(data = ERPdata,
                          frames = 1:426,
                          datacol = 2:427,
                          subjcol = 430,
                          chancol = 1,
                          othvarcol = c(428:429,431:432),
                          #select_subj = c("subj1","subj10"),
                          select_chan=c("CZ"))
S1S10 <- data_select(data = ERPdata,
                     frames = 1:426,
                     datacol = 2:427,
                     subjcol = 430,
                     chancol = 1,
                     othvarcol = c(428:429,431:432),
                     select_subj = c("subj1","subj10"))
dim(S1S10Fp1Fp2);dim(CZ);dim(S1S10)

## [1]   8 432

## [1]  40 432

## [1] 136 432

3. Data_summarize :

We could aggregate data by the column like conditions, subjects or channel.

data_summarize <- function(data,frames,datacol,subjcol=NULL,chancol=NULL,othvarcol=NULL,
                          summarycol,
                          fun=mean,
                          select_subj=NULL,
                          select_chan=NULL,...){ 
        #some check function
        options(warn=-1) # should close the warnings?
        # selection
        dta <- data_select(data,frames,datacol,subjcol,chancol,othvarcol,
                    select_subj,
                    select_chan)
        # process
        agglength <- length(summarycol)
        aggvar_list <- list(dta[,summarycol[1]])
        if (agglength > 1){
                for (i in 2:agglength ){
                        aggvar_list <- append(aggvar_list,list(dta[,summarycol[i]]))
                }
        }
        aggdata <- aggregate(dta[,datacol],by=aggvar_list,
                             fun,...)
        aggdata <- aggdata[,1:(agglength+length(datacol))]
        for (i in 1: agglength){
                colnames(aggdata)[i] <- colnames(dta)[summarycol[i]]
        }
        rownames(aggdata) <- 1:dim(aggdata)[1] 
        return(aggdata)
}

A <-data_summarize (data = ERPdata,
                    frames = 1:426,
                    datacol = 2:427,
                    #you could put all elements you want to aggregate in datacol argument
                    subjcol = 430,
                    chancol = 1,
                    othvarcol = c(428:429,431:432),
                    summarycol= c(1,429),
                    fun=median,
                    
                    # could also select data
                    select_chan = c("CZ","Fp1"), 
                    select_subj = c("subj1","subj10"))
tail(A[,1:6]);dim(A)

  Channel Condition    value.1    value.2    value.3    value.4
1     Fp1   nonword -1.4049110 -1.9667935 -1.7905150 -1.7634650
2      CZ   nonword -1.8309385 -2.4948925 -2.1159490 -1.9471855
3     Fp1      word  0.4695684  0.1779768 -0.5842498 -0.9740931
4      CZ      word  0.8391661  0.8317206  0.2696740  0.1090901

[1]   4 428

EDA

1.edaplot

2.ciplot

1.edaplot

The function “edaplot”" uses ggplot2 grammar, so it has several flexible options.

# group comparison please put in data after aggregate_raw
edaplot <- function(data,frames=NULL,datacol,subjcol=NULL,chancol=NULL,othvarcol=NULL,
                             outlinesub=NULL,outcolor="red",
                             select_subj=c(NULL),
                             select_chan=c(NULL),...){
        #some check functions
        #if (is.null(frames) == FALSE) 
        #        if (length(frames) != (ncol(data)-1-length(othvarcol)))
        #                stop(paste("frames should be either null or of length",
        #                           (ncol(data)-1-length(othvarcol))))
        #if (is.null(frames) == FALSE) {
        #        if (any(frames != sort(frames))) 
        #                stop("frames should be an ascending sequence of integers")
        #        }
        #if (is.null(frames)) 
        #        frames = 1:(ncol(data)-1-length(othvarcol))
        #selection
        dta <- data_select(data,frames,datacol,subjcol,chancol,othvarcol,
                        select_subj,
                        select_chan,...)
        # plot
        subvar <- variable.names(dta)[subjcol]
        dta$groupvar <- rownames(dta)
        datalong <- melt(dta,
                         id=c(variable.names(dta)[c(subjcol,chancol,othvarcol)],
                              "groupvar"))
        datalong <- datalong[order(datalong$groupvar),]
        datalongorder <- datalong
        datalongorder$frames <- rep(frames,length(datalongorder[,1])/length(frames))
        if (is.null(outlinesub) == FALSE){  # how to outline several subjects (and color)
                data2 <- subset(datalongorder,datalongorder[,1]==outlinesub)
                plot <- ggplot(datalongorder,
                               aes(x=frames,y=value,group=groupvar,...))+
                        geom_line()+
                        geom_line(data=data2,aes(x=frames,y=value),col=outcolor)
        # need warning for covering geom_line()
        } else {
                plot <- ggplot(datalongorder,
                               aes(x=frames,y=value,group=groupvar))+
                        geom_line()
        }
        return(plot)
}

#Although Full data (all trials) work fine, I recommened that the argument erpdata should be a single subject data or an aggregate data.
edaplot(ERPdata,
        frames = 1:426,
        datacol=2:427,
        subjcol=430,
        chancol=1,
        othvarcol=c(428:429,431:432), 
        outlinesub="subj9",outcolor = "blue",# highlight a single subject with the color you want
        select_sub = c("subj9","subj1","subj5","subj8","subj9"), 
        # choose the subject you want to show(optional)
        select_ch = c("F3","F4"))+
        # choose the channel you want to show(optional)
        facet_grid(Channel~Condition)+ # other flexible setting
        theme_bw()+
        stat_summary(aes(group=NULL),fun.y = "mean", 
                     colour = "red", size = 0.5, geom = "line")

        # put on the summary line NOTE: need a group=NULL argument 


edaplot(ERPdata,
        frames=1:426,
        datacol = 2:427,subjcol=430,chancol=1,othvarcol=c(428:429,431:432),
        select_chan = c("F3","F4","Cz"))+
        geom_line(aes(col=IQ))+
        facet_grid(Condition~Channel)+ 
        theme_bw()+
        theme(legend.position="none")+
        xlim(-100,500)+
        ylim(-15,15)+
        labs(list(title = "Flexible",x="time",y="signal"))

2.bootplot

bootplot use a bootstaping method to draw the confidence interval on two (or more) compared

variable.

ciplot <- function(data,frames,datacol,subjcol=NULL,chancol=NULL,othvarcol=NULL,
                   cpvarcol=NULL, singlecol="black",
                   fun=samplemean <- function(x, d){return(mean(x[d]))},  # very unfriendly setting
                   bootnum=500,
                   bootintval=c(.025,.975),
                   alpha=0.3,
                   select_subj = NULL,
                   select_chan = NULL,...){
        # need some check function
        bootstrap <- function(x,bootnum,bootfun,bootintval=c(0.025,0.975),quantilenum,...){
                boot_result <- boot(x,statistic = bootfun,R = bootnum,...) #fun
                return(quantile(boot_result$t,bootintval,na.rm = T)[quantilenum])
        }
        # data selection
        dta <- data_select(data,frames,datacol,subjcol,chancol,othvarcol,
                        select_subj,
                        select_chan,...)
        # Do you want to compare between variable ?
        if (is.null(cpvarcol)==TRUE){
            data_fun <- data_summarize(dta,frames,datacol,subjcol,chancol,othvarcol,
                                       summarycol = c(chancol),fun=fun)
            data_Q1 <- data_summarize(dta,frames,datacol,subjcol,chancol,othvarcol,
                                      summarycol = c(chancol),
                                      fun=bootstrap,bootnum=bootnum,bootfun=fun,
                                      bootintval=bootintval,quantilenum=1)
            data_Q2 <- data_summarize(dta,frames,datacol,subjcol,chancol,othvarcol,
                                      summarycol = c(chancol),
                                      fun=bootstrap,bootnum=bootnum,bootfun=fun,
                                      bootintval=bootintval,quantilenum=2)
            data_fun_long <- melt(data_fun,id=c(colnames(dta)[chancol]))
            data_fun_long <- data_fun_long[order(data_fun_long[,1],
                                                 data_fun_long[,2],
                                                 data_fun_long[,3]),]
            colnames(data_fun_long)[3] <- "FUN"
            data_Q1_long <- melt(data_Q1,id=c(colnames(dta)[chancol]))
            data_Q1_long <- data_Q1_long[order(data_Q1_long[,1],
                                               data_Q1_long[,2],
                                               data_Q1_long[,3]),]
            data_Q2_long <- melt(data_Q2,id=c(colnames(dta)[chancol]))
            data_Q2_long <- data_Q2_long[order(data_Q2_long[,1],
                                               data_Q2_long[,2],
                                               data_Q2_long[,3]),]
            data_fun_long$Q1 <- data_Q1_long[,3]
            data_fun_long$Q2 <- data_Q2_long[,3]
            data_fun_long$frames <- c(rep(frames,(dim(data_fun_long)[1]/length(frames)))) #change
            colnames(data_fun_long)[1] <- c("Channel")
            plot <- ggplot(data_fun_long,aes(x=frames))+
                    geom_ribbon(aes(x=frames, ymax=Q2, ymin=Q1,fill=singlecol), alpha=alpha)+ 
                    # set alpha 
                    geom_line(aes(y = FUN,col=singlecol))+ 
                    labs(y="Signal")+# Need some changes ?
                    facet_wrap(~Channel)+
                    theme(legend.position="none")
        } else {
                dta[,cpvarcol] <- as.factor(dta[,cpvarcol])
                dta[,chancol] <- as.factor(dta[,chancol])
                data_fun <- data_summarize(dta,frames,datacol,subjcol,chancol,othvarcol,
                                           summarycol = c(chancol,cpvarcol),fun=fun)
                data_Q1 <- data_summarize(dta,frames,datacol,subjcol,chancol,othvarcol,
                                          summarycol = c(chancol,cpvarcol),
                                          fun=bootstrap,bootnum=bootnum,bootfun=fun,
                                          bootintval=bootintval,quantilenum=1)
                data_Q2 <- data_summarize(dta,frames,datacol,subjcol,chancol,othvarcol,
                                          summarycol = c(chancol,cpvarcol),
                                          fun=bootstrap,bootnum=bootnum,bootfun=fun,
                                          bootintval=bootintval,quantilenum=2)
                data_fun_long <- melt(data_fun,id=c(colnames(dta)[chancol],colnames(dta)[cpvarcol]))
                data_fun_long <- data_fun_long[order(data_fun_long[,1],
                                             data_fun_long[,2],
                                             data_fun_long[,3]),]
                colnames(data_fun_long)[4] <- "FUN"
                data_Q1_long <- melt(data_Q1,id=c(colnames(dta)[chancol],colnames(dta)[cpvarcol]))
                data_Q1_long <- data_Q1_long[order(data_Q1_long[,1],
                                                   data_Q1_long[,2],
                                                   data_Q1_long[,3]),]
                data_Q2_long <- melt(data_Q2,id=c(colnames(dta)[chancol],colnames(dta)[cpvarcol]))
                data_Q2_long <- data_Q2_long[order(data_Q2_long[,1],
                                                   data_Q2_long[,2],
                                                   data_Q2_long[,3]),]
                data_fun_long$Q1 <- data_Q1_long[,4]
                data_fun_long$Q2 <- data_Q2_long[,4]
                data_fun_long$frames <- c(rep(frames,(dim(data_fun_long)[1]/length(frames))))
                colnames(data_fun_long)[1:2] <- c("Channel","Condition")
                plot <- ggplot(data_fun_long,aes(x=frames,group=Condition))+
                        geom_ribbon(aes(x=frames, ymax=Q2, ymin=Q1,fill=Condition), alpha=alpha)+
                        geom_line(aes(y = FUN,col=Condition))+ 
                        labs(y="Signal")+# Need some changes ?
                        facet_wrap(~Channel)
            }
        return(plot)
}

ciplot(ERPdata, # input the data
       frames=1:426, 
       datacol=2:427,
       subjcol=430,
       chancol=1,
       othvarcol=c(428:429,431:432),
       # Important : the column of that single variable you want to compare, and it could be NULL,
       # the function will return a single line and interval 
       cpvarcol=429, 
       #singlecol = "red", # work only when cpvarcol is NULL 
       fun=samplemean <- function(x, d){return(mean(x[d]))},
       # the function use to draw boot interval and line
       bootnum=500, # bootsraping number 
       bootintval=c(.025,.975), # interval 
       alpha=0.5, # the value of alpha on the plot
       select_chan = c("Fp1","Fp2"))+  # select data
       #select_subj = c("subj1","subj2","subj3","subj10"))+   # select data
       #sim = "parametric")+  # other setting in boot function
        ylim(-10,10)+
        scale_fill_manual(values=c("red","blue"),name="Cond",label=c("W","NW"))+
        scale_colour_manual(values=c("red","blue"),name="Cond",label=c("W","NW"))