mydat = read.csv("Commit_Record_1062_original.csv")
#combine date and time as POSIXct
mydat$combinedTime <- as.POSIXct(paste(mydat$date,mydat$time, sep=" "),
tryFormats = c("%d-%m-%Y %H:%M:%OS",
"%d/%m/%Y %H:%M:%OS",
"%d-%m-%Y %H:%M",
"%d/%m/%Y %H:%M",
"%d-%m-%Y",
"%d/%m/%Y" ))
colorNum= as.factor(mydat$color)
colorNum = as.character(colorNum)
hwNum = as.numeric(substr(mydat$hw,7,7))
colorNum[colorNum=="S"]<- 5
colorNum[colorNum=="CSF"]<- 4
colorNum[colorNum=="CTF"]<- 3
colorNum[colorNum=="CPF"]<- 2
colorNum[colorNum=="NB"]<- 1
# Try to group by homework and StudentID
mydat <-data.frame(mydat,colorNum, hwNum )
commit.time <- mydat%>% group_by(hw,stuId) %>% summarise(total_time = max(combinedTime)- min(combinedTime), commit.count= n())
# Summarize commit by hours
mydat$commit.hour <- format(as.POSIXct(mydat$combinedTime,format="%H:%M:%S"),"%H")
boxplot(as.numeric(total_time)~hw, data = commit.time, main= "Working duration of student on homeworks")
boxplot(as.numeric(commit.count)~hw, data = commit.time, main= "Number of commits on homeworks")
#duration stucking in a failure
dur.in.color <-mydat%>% group_by(hw,stuId,colorNum) %>% summarise(total_time = max(combinedTime)- min(combinedTime), commit.count= n())
boxplot(as.numeric(commit.count)~colorNum, data = dur.in.color, main= "Duration of idle in a fail homeworks")
####Commit time vs Result All homeworks
a <- ggplot(mydat, aes(x= combinedTime, y=colorNum, col= as.factor(stuId), group =as.factor(stuId) )) +geom_line(aes(col = as.factor(stuId))) + geom_point() +
scale_colour_manual(values= c(1:84))
a + labs(x = NULL,
y = NULL,
title = "Commit time vs Result All HWs")
hw4<- subset(mydat, hw=="OOP-HW4")
a <- ggplot(hw4, aes(x= combinedTime, y=colorNum, col= as.factor(stuId), group =as.factor(stuId) )) +geom_line(aes(col = as.factor(stuId))) + geom_point() +
scale_colour_manual(values= c(1:84))
a + labs(x = NULL,
y = NULL,
title = "Commit time vs Result HW4")
hw4 <- subset(mydat,hw =="OOP-HW6")
#Not success of an hw is not received number 5 in all submit
ok.stud = subset(hw4, colorNum==5) %>%unique()
hw4.NotSucc <-subset(hw4, stuId %in% setdiff(hw4$stuId, ok.stud$stuId))
a <- ggplot(hw4.NotSucc, aes(x= combinedTime, y=colorNum, col= stuId, group =stuId )) +geom_line(aes(col=stuId)) + geom_point()
a + labs(x = NULL,
y = NULL,
title = "Commit time vs Result")
group.time <- mydat%>% group_by(hwNum,stuId, combinedTime,colorNum)%>%summarise()
nrow <- dim(group.time)[1]
time.to.sucess <- zeros(nrow,1)
for (i in c(2:nrow))
{
# If the same studentID and hwID but different colors and the last color is 5 (successful)
if ((group.time[i,1]==group.time[i-1,1]) & (group.time[i,2]==group.time[i-1,2]) &(group.time[i,4]!=group.time[i-1,4])
&(group.time[i,4]==5 ))
{
time.to.sucess[i] <- group.time[i,3] - group.time[i-1,3]
}
}
group.time$time.to.succ <- time.to.sucess
fail.2.succ <- group.time %>% subset(time.to.succ!=0)
# Duration from last fail to success of students
boxplot(as.numeric( fail.2.succ$time.to.succ) ~ fail.2.succ$stuId, main="Duration from last fail to success of students")
####Try pivot table on time of submits
fail.2.succ %>%
tbl_df() %>%
rpivotTable()
# Summarize commit by hours
mydat$commit.hour <- format(as.POSIXct(mydat$combinedTime,format="%H:%M:%S"),"%H")
commit.by.hour <- mydat %>% group_by(commit.hour,colorNum)%>% summarise(color.count = n())
g <- ggplot(commit.by.hour, aes(x = commit.hour, y = color.count))
g+ geom_bar(aes(fill = colorNum), stat = "identity")