Load data
mydat = read.csv("Commit_Record_1062_original.csv")
#combine date and time as POSIXct
mydat$combinedTime <- as.POSIXct(paste(mydat$date,mydat$time, sep=" "),
tryFormats = c("%d-%m-%Y %H:%M:%OS",
"%d/%m/%Y %H:%M:%OS",
"%d-%m-%Y %H:%M",
"%d/%m/%Y %H:%M",
"%d-%m-%Y",
"%d/%m/%Y" ))
Conver color code to number
colorNum= as.factor(mydat$color)
colorNum = as.character(colorNum)
hwNum = as.numeric(substr(mydat$hw,7,7))
colorNum[colorNum=="S"]<- 5
colorNum[colorNum=="CSF"]<- 4
colorNum[colorNum=="CTF"]<- 3
colorNum[colorNum=="CPF"]<- 2
colorNum[colorNum=="NB"]<- 1
mydat <-data.frame(mydat,colorNum, hwNum )
Try to group by homework and StudentID
commit.time <- mydat%>% group_by(hw,stuId) %>% summarise(total_time = max(combinedTime)- min(combinedTime), commit.count= n())
boxplot(as.numeric(total_time)~hw, data = commit.time, main= "Working duration of student on homeworks")

boxplot(as.numeric(commit.count)~hw, data = commit.time, main= "Number of commits on homeworks")

#duration stucking in a failure
dur.in.color <-mydat%>% group_by(hw,stuId,colorNum) %>% summarise(total_time = max(combinedTime)- min(combinedTime), commit.count= n())
boxplot(as.numeric(commit.count)~colorNum, data = dur.in.color, main= "Duration of idle in a fail homeworks")

Pivot Table of whole dataset
mydat %>% tbl_df() %>% rpivotTable()