1. Read data

library(foreign)
## Warning: package 'foreign' was built under R version 3.2.5
data=read.csv("C:/Users/BINH THANG/Dropbox/R - Learning/thong/bieudo/data_1.csv")
head(data)
##           group stt    a1    a2 latency
## 1 Control group   1 12.04 12.73    7.78
## 2 Control group   2 12.58 13.21    7.47
## 3 Control group   3 11.32 11.87    6.69
## 4 Control group   4 13.22 12.94    5.15
## 5 Control group   5 13.58 13.87    3.43
## 6 Control group   6 12.83 11.86    5.29

2. reshape data (doi cac thoi diem khac nhau vao chung mot cot)

#dinh nghia nhom (define for group)


#reshape data as following

library(reshape2)
## Warning: package 'reshape2' was built under R version 3.2.5
a <- melt(data,
                 id.vars =c("group", "stt"),
                 measure.vars = c("a1","a2", "latency"),
                 variable.name = "time")

a$group=as.factor(a$group)
a$time=as.factor(a$time)

#review new dataset
head(a)
##           group stt time value
## 1 Control group   1   a1 12.04
## 2 Control group   2   a1 12.58
## 3 Control group   3   a1 11.32
## 4 Control group   4   a1 13.22
## 5 Control group   5   a1 13.58
## 6 Control group   6   a1 12.83

3. code duoi khong thay doi. (from40-175 no change)

## Gives count, mean, standard deviation, standard error of the mean, and confidence interval (default 95%).
##   data: a data frame.
##   measurevar: the name of a column that contains the variable to be summariezed
##   groupvars: a vector containing names of columns that contain grouping variables
##   na.rm: a boolean that indicates whether to ignore NA's
##   conf.interval: the percent range of the confidence interval (default is 95%)
summarySE <- function(data=NULL, measurevar, groupvars=NULL, na.rm=FALSE,
                      conf.interval=.95, .drop=TRUE) {
    library(plyr)

    # New version of length which can handle NA's: if na.rm==T, don't count them
    length2 <- function (x, na.rm=FALSE) {
        if (na.rm) sum(!is.na(x))
        else       length(x)
    }

    # This does the summary. For each group's data frame, return a vector with
    # N, mean, and sd
    datac <- ddply(data, groupvars, .drop=.drop,
      .fun = function(xx, col) {
        c(N    = length2(xx[[col]], na.rm=na.rm),
          mean = mean   (xx[[col]], na.rm=na.rm),
          sd   = sd     (xx[[col]], na.rm=na.rm)
        )
      },
      measurevar
    )

    # Rename the "mean" column    
    datac <- rename(datac, c("mean" = measurevar))

    datac$se <- datac$sd / sqrt(datac$N)  # Calculate standard error of the mean

    # Confidence interval multiplier for standard error
    # Calculate t-statistic for confidence interval: 
    # e.g., if conf.interval is .95, use .975 (above/below), and use df=N-1
    ciMult <- qt(conf.interval/2 + .5, datac$N-1)
    datac$ci <- datac$se * ciMult

    return(datac)
}
## Norms the data within specified groups in a data frame; it normalizes each
## subject (identified by idvar) so that they have the same mean, within each group
## specified by betweenvars.
##   data: a data frame.
##   idvar: the name of a column that identifies each subject (or matched subjects)
##   measurevar: the name of a column that contains the variable to be summariezed
##   betweenvars: a vector containing names of columns that are between-subjects variables
##   na.rm: a boolean that indicates whether to ignore NA's
normDataWithin <- function(data=NULL, idvar, measurevar, betweenvars=NULL,
                           na.rm=FALSE, .drop=TRUE) {
    library(plyr)

    # Measure var on left, idvar + between vars on right of formula.
    data.subjMean <- ddply(data, c(idvar, betweenvars), .drop=.drop,
     .fun = function(xx, col, na.rm) {
        c(subjMean = mean(xx[,col], na.rm=na.rm))
      },
      measurevar,
      na.rm
    )

    # Put the subject means with original data
    data <- merge(data, data.subjMean)

    # Get the normalized data in a new column
    measureNormedVar <- paste(measurevar, "_norm", sep="")
    data[,measureNormedVar] <- data[,measurevar] - data[,"subjMean"] +
                               mean(data[,measurevar], na.rm=na.rm)

    # Remove this subject mean column
    data$subjMean <- NULL

    return(data)
}
## Summarizes data, handling within-subjects variables by removing inter-subject variability.
## It will still work if there are no within-S variables.
## Gives count, un-normed mean, normed mean (with same between-group mean),
##   standard deviation, standard error of the mean, and confidence interval.
## If there are within-subject variables, calculate adjusted values using method from Morey (2008).
##   data: a data frame.
##   measurevar: the name of a column that contains the variable to be summariezed
##   betweenvars: a vector containing names of columns that are between-subjects variables
##   withinvars: a vector containing names of columns that are within-subjects variables
##   idvar: the name of a column that identifies each subject (or matched subjects)
##   na.rm: a boolean that indicates whether to ignore NA's
##   conf.interval: the percent range of the confidence interval (default is 95%)
summarySEwithin <- function(data=NULL, measurevar, betweenvars=NULL, withinvars=NULL,
                            idvar=NULL, na.rm=FALSE, conf.interval=.95, .drop=TRUE) {

  # Ensure that the betweenvars and withinvars are factors
  factorvars <- vapply(data[, c(betweenvars, withinvars), drop=FALSE],
    FUN=is.factor, FUN.VALUE=logical(1))

  if (!all(factorvars)) {
    nonfactorvars <- names(factorvars)[!factorvars]
    message("Automatically converting the following non-factors to factors: ",
            paste(nonfactorvars, collapse = ", "))
    data[nonfactorvars] <- lapply(data[nonfactorvars], factor)
  }

  # Get the means from the un-normed data
  datac <- summarySE(data, measurevar, groupvars=c(betweenvars, withinvars),
                     na.rm=na.rm, conf.interval=conf.interval, .drop=.drop)

  # Drop all the unused columns (these will be calculated with normed data)
  datac$sd <- NULL
  datac$se <- NULL
  datac$ci <- NULL

  # Norm each subject's data
  ndata <- normDataWithin(data, idvar, measurevar, betweenvars, na.rm, .drop=.drop)

  # This is the name of the new column
  measurevar_n <- paste(measurevar, "_norm", sep="")

  # Collapse the normed data - now we can treat between and within vars the same
  ndatac <- summarySE(ndata, measurevar_n, groupvars=c(betweenvars, withinvars),
                      na.rm=na.rm, conf.interval=conf.interval, .drop=.drop)

  # Apply correction from Morey (2008) to the standard error and confidence interval
  #  Get the product of the number of conditions of within-S variables
  nWithinGroups    <- prod(vapply(ndatac[,withinvars, drop=FALSE], FUN=nlevels,
                           FUN.VALUE=numeric(1)))
  correctionFactor <- sqrt( nWithinGroups / (nWithinGroups-1) )

  # Apply the correction factor
  ndatac$sd <- ndatac$sd * correctionFactor
  ndatac$se <- ndatac$se * correctionFactor
  ndatac$ci <- ndatac$ci * correctionFactor

  # Combine the un-normed means with the normed results
  merge(datac, ndatac)
}

4. Mo ta so lieu o cac thoi diem khac nhau

b <- summarySE(a, measurevar="value", groupvars=c( "time", "group"))
## Warning: package 'plyr' was built under R version 3.2.5
#get result b description 

b
##      time                      group  N     value        sd        se
## 1      a1              Control group 12 12.675833 0.8605015 0.2484054
## 2      a1   Susceptible group (?100)  9  6.307778 1.2266089 0.4088696
## 3      a1 Unsusceptible group (?100)  3  6.890000 0.2882707 0.1664332
## 4      a2              Control group 12 12.752500 0.6471212 0.1868078
## 5      a2   Susceptible group (?100)  9  6.175556 1.2474886 0.4158295
## 6      a2 Unsusceptible group (?100)  3  6.743333 0.6313742 0.3645240
## 7 latency              Control group 12  6.210000 1.4851813 0.4287349
## 8 latency   Susceptible group (?100)  9  8.572222 5.0956888 1.6985629
## 9 latency Unsusceptible group (?100)  3 11.770000 5.4932049 3.1715033
##           ci
## 1  0.5467366
## 2  0.9428551
## 3  0.7161041
## 4  0.4111612
## 5  0.9589046
## 6  1.5684204
## 7  0.9436392
## 8  3.9168932
## 9 13.6458774

5 Ve bieu do

library(ggplot2)
## Warning: package 'ggplot2' was built under R version 3.2.5
pd <- position_dodge(0.1)

ggplot(a, aes(x=time, y=value, fill=group)) + geom_boxplot()