require(ggplot2)
require(ggthemes)
require(dplyr)
require(tidyr)
require(lubridate)
require(stats)

Opening and consolidating dataset

filename <- "flf.txt"

project <- unlist(strsplit(filename,c("/|\\.")))[1]

filename_err <- paste0(project,"_err.txt")

## Some file names contain commas which mess up the read.csv function. Here we open as text lines separated by semicolons, turn the commas into dashes, set commas and save as a csv file. This extra step guarantees that all files may be read.

data_lines <- (unlist(lapply(readLines(filename),function(x)gsub("\r\n^;|\r\n^,",";",x))))
data_lines <- (unlist(lapply(readLines(filename),function(x)gsub(",","-",x))))
csv_lines <- (unlist(lapply(data_lines,function(x)gsub(";",",",x))))

data_lines_err <- (unlist(lapply(readLines(filename_err),function(x)gsub("\r\n;|^\r\n,",";",x))))
data_lines_err <- (unlist(lapply(readLines(filename_err),function(x)gsub(",","-",x))))
csv_lines_err <- c("fileId,fileName,mimeType,createdTime,ownerName,modifiedTime,lastModName",(unlist(lapply(data_lines_err,function(x)gsub(";",",",x)))))

## Writing as csv
write(csv_lines,paste0(project,".csv"))
write(csv_lines_err,paste0(project,"_err.csv"))

## Re-reading as csv
edits <- tbl_df(read.csv(paste0(project,".csv"),  fill = TRUE, stringsAsFactors = FALSE, fileEncoding = "UTF-8"))
edits <- edits %>% mutate(ownerName=ifelse(ownerName=="",NA,ownerName),lastModName=ifelse(lastModName=="",NA,ownerName))

noedits <- tbl_df(read.csv(paste0(project,"_err.csv"),  fill = TRUE, stringsAsFactors = FALSE, fileEncoding = "UTF-8"))
noedits <- noedits %>% mutate(ownerName=ifelse(ownerName=="",NA,ownerName))

raw_data <- rbind(edits,noedits)
raw_data <- raw_data[!is.na(as.Date(raw_data$createdTime)),]
contributors <- 
  raw_data %>% 
  mutate(interaction = ifelse(is.na(modifiedTime),"created","modified"),
         datetime = ifelse(is.na(modifiedTime),createdTime,modifiedTime), 
         user = ifelse(is.na(lastModName),ownerName,lastModName)) %>% 
  mutate(user = ifelse(is.na(user),"*NON-LOGGED",toupper(user)),
         datetime = date(datetime)) %>% 
  group_by(user, datetime) %>% 
  summarize(count=n_distinct(datetime)) %>% 
  mutate(cumsum=cumsum(count))

contributor_ranking <- contributors %>% group_by(user) %>% summarize(total=sum(count)) %>% arrange(desc(total))
  
# png(
#   paste0(filename," ",format(now(), "%Y-%m-%d %I%M%p")," [01].png"), 
#   width=20, 
#   height=15, 
#   units="cm", 
#   res=500)

m<-ggplot(contributors,aes(x=datetime,y=cumsum, color=user))+geom_line(alpha=0.5)+theme_classic()+scale_x_date(date_breaks = "1 month", date_labels = "%m %Y")+guides(color="none")+theme(axis.text.x=element_text(angle = 90, hjust = 1))+guides(color="none")+labs(title=paste(project,"-","Timeline of edits by participant"), subtitle=paste("Filename:",filename),x="Date",y="Edits")

m

# dev.off()

data <- 
  raw_data %>% 
  mutate(interaction = ifelse(is.na(modifiedTime),"created","modified"),
         datetime = ifelse(is.na(modifiedTime),createdTime,modifiedTime), 
         user = ifelse(is.na(lastModName),ownerName,lastModName)) %>% 
  mutate(datetime = date(datetime)) %>% 
  group_by(datetime) %>% 
  summarize(edits=length(user), editors=n_distinct(user)) %>%
  arrange(desc(edits))

weekly <- data %>% mutate(datetime=format(data$datetime,"%Y-%m")) %>% group_by(datetime) %>% group_by(datetime) %>% summarize(week_edits=sum(edits),week_editors=sum(editors))

gathered_weekly <- weekly %>% gather(key=type,value=amount,week_edits,week_editors)

# png(
#   paste0(filename," ",format(now(), "%Y-%m-%d %I%M%p")," [02].png"), 
#   width=20, 
#   height=15, 
#   units="cm", 
#   res=500)

k <- ggplot(gathered_weekly,aes(x=datetime,y=amount,color=type))+geom_point()+geom_line(aes(group=type))+theme_classic()+theme(axis.text.x = element_text(angle = 90, hjust = 1,size=8,color="#888888"),axis.text.y=element_text(size=8,color="#888888"))+labs(title=paste(project,"-","Edits and editors over time"), subtitle=paste("Filename:",filename),x="Date",y="Amount")

k

# dev.off()