library(tidyverse)
library("readxl")
library("writexl")
library(tidyr)
library(dplyr)
library(lubridate)data38id <- data38id %>% arrange(desc(data38id$TIME)) %>% mutate(id = rownames(data38id))
usagetime <- Aug30_usagetime %>% group_by(UserID,ActiveDate) %>% summarise(totalTime = sum(`Usage Time (minute)`))
usagetime_w <- usagetime %>% mutate(week = week(ActiveDate)) login <- usagetime_w %>% group_by(UserID,week) %>% summarise(count = n())
login_wide <- spread(login, week, count) # 45 user
login_wide <- merge(data38id,login_wide) # 38 usertotaltime <- usagetime_w %>% group_by(UserID,week) %>% summarise(totaltime = sum(totalTime))
totaltime_wide <- spread(totaltime, week, totaltime)
totaltime_wide <- merge(data38id,totaltime_wide)avgtime <- usagetime_w %>% group_by(UserID,week) %>% summarise(avg = mean(totalTime))
avgtime_wide <- spread(avgtime, week, avg)
avgtime_wide <- merge(data38id,avgtime_wide)usagetime1 <- Aug30_usagetime
usagetime2 <- spread(usagetime1, Feature,'Usage Time (minute)')feature_1 <- usagetime2 %>% group_by(UserID,ActiveDate,week) %>% summarise(time=sum(discovery))
feature_1_w <- feature_1 %>% group_by(UserID,week) %>% summarise(timefeature_1 = sum(time,na.rm = T))
feature_1_s <- spread(feature_1_w, week, timefeature_1)
feature_1_m <- merge(data38id,feature_1_s)
head(feature_1_m,5) %>%knitr::kable()| UserID | TIME | id | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 |
|---|---|---|---|---|---|---|---|---|---|---|---|
| 0d6ddcac-bd09-44e3-908a-f89936588ab9 | 62.60 | 30 | NA | NA | NA | NA | 3.3 | 0 | NA | 0.00 | 0 |
| 122557e2-637e-4c16-a515-ffab97ac2c9b | 222.62 | 13 | NA | NA | NA | NA | NA | NA | 32.77 | NA | NA |
| 12fd4355-0eb5-4ca5-948a-549c9bdf90b0 | 223.63 | 12 | NA | 3.18 | 1.47 | 0.00 | NA | NA | 0.00 | 0.00 | NA |
| 17835b88-a26e-439d-8922-d2c80ab99948 | 53.50 | 31 | NA | NA | NA | 6.17 | NA | 0 | NA | NA | NA |
| 2227d10d-ac61-4d2d-a3a1-aa21bdc24147 | 31.46 | 35 | NA | 0.00 | NA | NA | NA | NA | 0.00 | 5.98 | NA |
feature_2 <- usagetime2 %>% group_by(UserID,ActiveDate,week) %>% summarise(time=sum(entertainment))
feature_2_w <- feature_2 %>% group_by(UserID,week) %>% summarise(timefeature_2 = sum(time,na.rm = T))
feature_2_s <- spread(feature_2_w, week, timefeature_2)
feature_2_m <- merge(data38id,feature_2_s)
head(feature_2_m,5) %>%knitr::kable()| UserID | TIME | id | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 |
|---|---|---|---|---|---|---|---|---|---|---|---|
| 0d6ddcac-bd09-44e3-908a-f89936588ab9 | 62.60 | 30 | NA | NA | NA | NA | 8.55 | 1.27 | NA | 0.63 | 0.45 |
| 122557e2-637e-4c16-a515-ffab97ac2c9b | 222.62 | 13 | NA | NA | NA | NA | NA | NA | 135.15 | NA | NA |
| 12fd4355-0eb5-4ca5-948a-549c9bdf90b0 | 223.63 | 12 | NA | 0.1 | 5.18 | 7.53 | NA | NA | 7.68 | 4.75 | NA |
| 17835b88-a26e-439d-8922-d2c80ab99948 | 53.50 | 31 | NA | NA | NA | 6.98 | NA | 3.00 | NA | NA | NA |
| 2227d10d-ac61-4d2d-a3a1-aa21bdc24147 | 31.46 | 35 | NA | 0.0 | NA | NA | NA | NA | 0.00 | 0.00 | NA |
feature_3 <- usagetime2 %>% group_by(UserID,ActiveDate,week) %>% summarise(time=sum(learn))
feature_3_w <- feature_3 %>% group_by(UserID,week) %>% summarise(timefeature_3 = sum(time,na.rm = T))
feature_3_s <- spread(feature_3_w, week, timefeature_3)
feature_3_m <- merge(data38id,feature_3_s)
head(feature_3_m,5) %>%knitr::kable()| UserID | TIME | id | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 |
|---|---|---|---|---|---|---|---|---|---|---|---|
| 0d6ddcac-bd09-44e3-908a-f89936588ab9 | 62.60 | 30 | NA | NA | NA | NA | 19.12 | 1.63 | NA | 11.37 | 16.28 |
| 122557e2-637e-4c16-a515-ffab97ac2c9b | 222.62 | 13 | NA | NA | NA | NA | NA | NA | 54.70 | NA | NA |
| 12fd4355-0eb5-4ca5-948a-549c9bdf90b0 | 223.63 | 12 | NA | 62.43 | 17.68 | 18.38 | NA | NA | 47.68 | 47.57 | NA |
| 17835b88-a26e-439d-8922-d2c80ab99948 | 53.50 | 31 | NA | NA | NA | 25.43 | NA | 11.92 | NA | NA | NA |
| 2227d10d-ac61-4d2d-a3a1-aa21bdc24147 | 31.46 | 35 | NA | 3.95 | NA | NA | NA | NA | 18.20 | 3.33 | NA |
feature_3$time[is.na(feature_3$time)] <- 0
feature_3_n <- feature_3 %>% filter(time > 0) %>% group_by(UserID,week) %>% summarise(n = n())
feature_2$time[is.na(feature_2$time)] <- 0
feature_2_n <- feature_2 %>% filter(time > 0) %>% group_by(UserID,week) %>% summarise(n = n())
feature_1$time[is.na(feature_1$time)] <- 0
feature_1_n <- feature_1 %>% filter(time > 0) %>% group_by(UserID,week) %>% summarise(n = n())feature_1_avg <- feature_1 %>% group_by(UserID,week) %>% summarise(timefeature_1 = mean(time,na.rm = T))
feature_1_avgs <- spread(feature_1_avg, week, timefeature_1)
feature_1_avgm <- merge(data38id,feature_1_avgs)
feature_2_avg <- feature_2 %>% group_by(UserID,week) %>% summarise(timefeature_2 = mean(time,na.rm = T))
feature_2_avgs <- spread(feature_2_avg, week, timefeature_2)
feature_2_avgm <- merge(data38id,feature_2_avgs)
feature_3_avg <- feature_3 %>% group_by(UserID,week) %>% summarise(timefeature_3 = mean(time,na.rm = T))
feature_3_avgs <- spread(feature_3_avg, week, timefeature_3)
feature_3_avgm <- merge(data38id,feature_3_avgs)
head(feature_3_avgm,5) %>%knitr::kable()| UserID | TIME | id | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 |
|---|---|---|---|---|---|---|---|---|---|---|---|
| 0d6ddcac-bd09-44e3-908a-f89936588ab9 | 62.60 | 30 | NA | NA | NA | NA | 19.12 | 1.63 | NA | 11.370 | 16.28 |
| 122557e2-637e-4c16-a515-ffab97ac2c9b | 222.62 | 13 | NA | NA | NA | NA | NA | NA | 10.94 | NA | NA |
| 12fd4355-0eb5-4ca5-948a-549c9bdf90b0 | 223.63 | 12 | NA | 20.81 | 17.68 | 18.38 | NA | NA | 23.84 | 23.785 | NA |
| 17835b88-a26e-439d-8922-d2c80ab99948 | 53.50 | 31 | NA | NA | NA | 25.43 | NA | 5.96 | NA | NA | NA |
| 2227d10d-ac61-4d2d-a3a1-aa21bdc24147 | 31.46 | 35 | NA | 3.95 | NA | NA | NA | NA | 18.20 | 3.330 | NA |