Load data

mydat = read.csv("Commit_Record_1062_original.csv")
#combine date and time as POSIXct
mydat$combinedTime <- as.POSIXct(paste(mydat$date,mydat$time, sep=" "), 
                          tryFormats = c("%d-%m-%Y %H:%M:%OS",
                          "%d/%m/%Y %H:%M:%OS",
                          "%d-%m-%Y %H:%M",
                          "%d/%m/%Y %H:%M",
                          "%d-%m-%Y",
                          "%d/%m/%Y" ))

Some preprocessing, summarizing and calculating

colorNum=  as.factor(mydat$color)
colorNum = as.character(colorNum)
hwNum = as.numeric(substr(mydat$hw,7,7))


colorNum[colorNum=="S"]<- 5
colorNum[colorNum=="CSF"]<- 4
colorNum[colorNum=="CTF"]<- 3
colorNum[colorNum=="CPF"]<- 2
colorNum[colorNum=="NB"]<- 1
# Try to group by homework and StudentID

mydat <-data.frame(mydat,colorNum, hwNum )
commit.time <- mydat%>% group_by(hw,stuId) %>% summarise(total_time = max(combinedTime)- min(combinedTime), commit.count= n())
# Summarize commit by hours
mydat$commit.hour <- format(as.POSIXct(mydat$combinedTime,format="%H:%M:%S"),"%H")

Plot some durations

boxplot(as.numeric(total_time)~hw, data = commit.time,  main= "Working duration of student on homeworks")

boxplot(as.numeric(commit.count)~hw, data = commit.time,  main= "Number of commits on homeworks")

#duration stucking in a failure
dur.in.color <-mydat%>% group_by(hw,stuId,colorNum) %>% summarise(total_time = max(combinedTime)- min(combinedTime), commit.count= n())
boxplot(as.numeric(commit.count)~colorNum, data = dur.in.color,  main= "Duration of idle in a fail homeworks")

####Commit time vs Result All homeworks

a <- ggplot(mydat, aes(x= combinedTime, y=colorNum, col= as.factor(stuId), group =as.factor(stuId) ))  +geom_line(aes(col = as.factor(stuId))) + geom_point() +
  scale_colour_manual(values= c(1:84))
a + labs(x = NULL,
y = NULL,
title = "Commit time vs Result All HWs")

Commit time vs Result Homework4

hw4<- subset(mydat, hw=="OOP-HW4")
a <- ggplot(hw4, aes(x= combinedTime, y=colorNum, col= as.factor(stuId), group =as.factor(stuId) ))  +geom_line(aes(col = as.factor(stuId))) + geom_point() +
  scale_colour_manual(values= c(1:84))
a + labs(x = NULL,
y = NULL,
title = "Commit time vs Result HW4")

Try to plot unsuccess students
hw4 <- subset(mydat,hw =="OOP-HW6")
#Not success of an hw is not received number 5 in all submit
ok.stud =  subset(hw4, colorNum==5) %>%unique()
hw4.NotSucc <-subset(hw4, stuId %in% setdiff(hw4$stuId, ok.stud$stuId))


a <- ggplot(hw4.NotSucc, aes(x= combinedTime, y=colorNum, col= stuId, group =stuId ))  +geom_line(aes(col=stuId)) + geom_point()
a + labs(x = NULL,
y = NULL,
title = "Commit time vs Result")

Calcuculation duration from latest fail to success

group.time <- mydat%>% group_by(hwNum,stuId, combinedTime,colorNum)%>%summarise()
nrow <- dim(group.time)[1]
time.to.sucess <- zeros(nrow,1)


for (i in c(2:nrow))
{
  # If the same studentID and hwID but different colors and the last color is 5 (successful)
  if ((group.time[i,1]==group.time[i-1,1]) & (group.time[i,2]==group.time[i-1,2]) &(group.time[i,4]!=group.time[i-1,4])
      &(group.time[i,4]==5 ))
  {
    time.to.sucess[i] <- group.time[i,3] - group.time[i-1,3]
  }
}
group.time$time.to.succ <- time.to.sucess

fail.2.succ <- group.time %>% subset(time.to.succ!=0)
# Duration from last fail to success of students
boxplot(as.numeric( fail.2.succ$time.to.succ) ~ fail.2.succ$stuId, main="Duration from last fail to success of students")

####Try pivot table on time of submits

fail.2.succ %>%
  tbl_df() %>%
   rpivotTable()

Plot commit times

# Summarize commit by hours
mydat$commit.hour <- format(as.POSIXct(mydat$combinedTime,format="%H:%M:%S"),"%H")
commit.by.hour <- mydat %>% group_by(commit.hour,colorNum)%>% summarise(color.count = n())

Stacked barchart for commit vs hour

g <- ggplot(commit.by.hour, aes(x = commit.hour, y = color.count))
g+ geom_bar(aes(fill = colorNum), stat = "identity")