1. Read data
2. reshape data (doi cac thoi diem khac nhau vao chung mot cot)
3. code duoi khong thay doi. (from40-175 no change)
4. Mo ta so lieu o cac thoi diem khac nhau
5 Ve bieu do

1. Read data

library(foreign)

## Warning: package 'foreign' was built under R version 3.2.5

data=read.csv("C:/Users/BINH THANG/Dropbox/R - Learning/thong/bieudo/data_1.csv")
head(data)

##           group stt    a1    a2 latency
## 1 Control group   1 12.04 12.73    7.78
## 2 Control group   2 12.58 13.21    7.47
## 3 Control group   3 11.32 11.87    6.69
## 4 Control group   4 13.22 12.94    5.15
## 5 Control group   5 13.58 13.87    3.43
## 6 Control group   6 12.83 11.86    5.29

2. reshape data (doi cac thoi diem khac nhau vao chung mot cot)

#dinh nghia nhom (define for group)


#reshape data as following

library(reshape2)

## Warning: package 'reshape2' was built under R version 3.2.5

a <- melt(data,
                 id.vars =c("group", "stt"),
                 measure.vars = c("a1","a2", "latency"),
                 variable.name = "time")

a$group=as.factor(a$group)
a$time=as.factor(a$time)

#review new dataset
head(a)

##           group stt time value
## 1 Control group   1   a1 12.04
## 2 Control group   2   a1 12.58
## 3 Control group   3   a1 11.32
## 4 Control group   4   a1 13.22
## 5 Control group   5   a1 13.58
## 6 Control group   6   a1 12.83

3. code duoi khong thay doi. (from40-175 no change)

## Gives count, mean, standard deviation, standard error of the mean, and confidence interval (default 95%).
##   data: a data frame.
##   measurevar: the name of a column that contains the variable to be summariezed
##   groupvars: a vector containing names of columns that contain grouping variables
##   na.rm: a boolean that indicates whether to ignore NA's
##   conf.interval: the percent range of the confidence interval (default is 95%)
summarySE <- function(data=NULL, measurevar, groupvars=NULL, na.rm=FALSE,
                      conf.interval=.95, .drop=TRUE) {
    library(plyr)

    # New version of length which can handle NA's: if na.rm==T, don't count them
    length2 <- function (x, na.rm=FALSE) {
        if (na.rm) sum(!is.na(x))
        else       length(x)
    }

    # This does the summary. For each group's data frame, return a vector with
    # N, mean, and sd
    datac <- ddply(data, groupvars, .drop=.drop,
      .fun = function(xx, col) {
        c(N    = length2(xx[[col]], na.rm=na.rm),
          mean = mean   (xx[[col]], na.rm=na.rm),
          sd   = sd     (xx[[col]], na.rm=na.rm)
        )
      },
      measurevar
    )

    # Rename the "mean" column    
    datac <- rename(datac, c("mean" = measurevar))

    datac$se <- datac$sd / sqrt(datac$N)  # Calculate standard error of the mean

    # Confidence interval multiplier for standard error
    # Calculate t-statistic for confidence interval: 
    # e.g., if conf.interval is .95, use .975 (above/below), and use df=N-1
    ciMult <- qt(conf.interval/2 + .5, datac$N-1)
    datac$ci <- datac$se * ciMult

    return(datac)
}
## Norms the data within specified groups in a data frame; it normalizes each
## subject (identified by idvar) so that they have the same mean, within each group
## specified by betweenvars.
##   data: a data frame.
##   idvar: the name of a column that identifies each subject (or matched subjects)
##   measurevar: the name of a column that contains the variable to be summariezed
##   betweenvars: a vector containing names of columns that are between-subjects variables
##   na.rm: a boolean that indicates whether to ignore NA's
normDataWithin <- function(data=NULL, idvar, measurevar, betweenvars=NULL,
                           na.rm=FALSE, .drop=TRUE) {
    library(plyr)

    # Measure var on left, idvar + between vars on right of formula.
    data.subjMean <- ddply(data, c(idvar, betweenvars), .drop=.drop,
     .fun = function(xx, col, na.rm) {
        c(subjMean = mean(xx[,col], na.rm=na.rm))
      },
      measurevar,
      na.rm
    )

    # Put the subject means with original data
    data <- merge(data, data.subjMean)

    # Get the normalized data in a new column
    measureNormedVar <- paste(measurevar, "_norm", sep="")
    data[,measureNormedVar] <- data[,measurevar] - data[,"subjMean"] +
                               mean(data[,measurevar], na.rm=na.rm)

    # Remove this subject mean column
    data$subjMean <- NULL

    return(data)
}
## Summarizes data, handling within-subjects variables by removing inter-subject variability.
## It will still work if there are no within-S variables.
## Gives count, un-normed mean, normed mean (with same between-group mean),
##   standard deviation, standard error of the mean, and confidence interval.
## If there are within-subject variables, calculate adjusted values using method from Morey (2008).
##   data: a data frame.
##   measurevar: the name of a column that contains the variable to be summariezed
##   betweenvars: a vector containing names of columns that are between-subjects variables
##   withinvars: a vector containing names of columns that are within-subjects variables
##   idvar: the name of a column that identifies each subject (or matched subjects)
##   na.rm: a boolean that indicates whether to ignore NA's
##   conf.interval: the percent range of the confidence interval (default is 95%)
summarySEwithin <- function(data=NULL, measurevar, betweenvars=NULL, withinvars=NULL,
                            idvar=NULL, na.rm=FALSE, conf.interval=.95, .drop=TRUE) {

  # Ensure that the betweenvars and withinvars are factors
  factorvars <- vapply(data[, c(betweenvars, withinvars), drop=FALSE],
    FUN=is.factor, FUN.VALUE=logical(1))

  if (!all(factorvars)) {
    nonfactorvars <- names(factorvars)[!factorvars]
    message("Automatically converting the following non-factors to factors: ",
            paste(nonfactorvars, collapse = ", "))
    data[nonfactorvars] <- lapply(data[nonfactorvars], factor)
  }

  # Get the means from the un-normed data
  datac <- summarySE(data, measurevar, groupvars=c(betweenvars, withinvars),
                     na.rm=na.rm, conf.interval=conf.interval, .drop=.drop)

  # Drop all the unused columns (these will be calculated with normed data)
  datac$sd <- NULL
  datac$se <- NULL
  datac$ci <- NULL

  # Norm each subject's data
  ndata <- normDataWithin(data, idvar, measurevar, betweenvars, na.rm, .drop=.drop)

  # This is the name of the new column
  measurevar_n <- paste(measurevar, "_norm", sep="")

  # Collapse the normed data - now we can treat between and within vars the same
  ndatac <- summarySE(ndata, measurevar_n, groupvars=c(betweenvars, withinvars),
                      na.rm=na.rm, conf.interval=conf.interval, .drop=.drop)

  # Apply correction from Morey (2008) to the standard error and confidence interval
  #  Get the product of the number of conditions of within-S variables
  nWithinGroups    <- prod(vapply(ndatac[,withinvars, drop=FALSE], FUN=nlevels,
                           FUN.VALUE=numeric(1)))
  correctionFactor <- sqrt( nWithinGroups / (nWithinGroups-1) )

  # Apply the correction factor
  ndatac$sd <- ndatac$sd * correctionFactor
  ndatac$se <- ndatac$se * correctionFactor
  ndatac$ci <- ndatac$ci * correctionFactor

  # Combine the un-normed means with the normed results
  merge(datac, ndatac)
}

4. Mo ta so lieu o cac thoi diem khac nhau

b <- summarySE(a, measurevar="value", groupvars=c( "time", "group"))

## Warning: package 'plyr' was built under R version 3.2.5

#get result b description 

b

##      time                      group  N     value        sd        se
## 1      a1              Control group 12 12.675833 0.8605015 0.2484054
## 2      a1   Susceptible group (?100)  9  6.307778 1.2266089 0.4088696
## 3      a1 Unsusceptible group (?100)  3  6.890000 0.2882707 0.1664332
## 4      a2              Control group 12 12.752500 0.6471212 0.1868078
## 5      a2   Susceptible group (?100)  9  6.175556 1.2474886 0.4158295
## 6      a2 Unsusceptible group (?100)  3  6.743333 0.6313742 0.3645240
## 7 latency              Control group 12  6.210000 1.4851813 0.4287349
## 8 latency   Susceptible group (?100)  9  8.572222 5.0956888 1.6985629
## 9 latency Unsusceptible group (?100)  3 11.770000 5.4932049 3.1715033
##           ci
## 1  0.5467366
## 2  0.9428551
## 3  0.7161041
## 4  0.4111612
## 5  0.9589046
## 6  1.5684204
## 7  0.9436392
## 8  3.9168932
## 9 13.6458774

5 Ve bieu do

library(ggplot2)

## Warning: package 'ggplot2' was built under R version 3.2.5

pd <- position_dodge(0.1)

ggplot(a, aes(x=time, y=value, fill=group)) + geom_boxplot()

Bieu do hop

Thang, TB

30 May 2017

1. Read data

2. reshape data (doi cac thoi diem khac nhau vao chung mot cot)

3. code duoi khong thay doi. (from40-175 no change)

4. Mo ta so lieu o cac thoi diem khac nhau

5 Ve bieu do