1 Library Loading

library(tidyverse)
library("readxl")
library("writexl")
library(tidyr)
library(dplyr)
library(lubridate)

2 Data Wrangling

2.1 App usage analysis

2.1.1 Total usage time

data38id <- data38id %>% arrange(desc(data38id$TIME)) %>% mutate(id = rownames(data38id))

usagetime <- Aug30_usagetime %>% group_by(UserID,ActiveDate) %>% summarise(totalTime = sum(`Usage Time (minute)`))
usagetime_w <- usagetime %>% mutate(week = week(ActiveDate)) 

2.1.2 number of login

login <- usagetime_w %>% group_by(UserID,week) %>% summarise(count = n())
login_wide <- spread(login, week, count) # 45 user 
login_wide <- merge(data38id,login_wide) # 38 user

2.1.3 total time

totaltime <- usagetime_w %>% group_by(UserID,week) %>% summarise(totaltime = sum(totalTime))
totaltime_wide <- spread(totaltime, week, totaltime)
totaltime_wide <- merge(data38id,totaltime_wide)

2.1.4 average time

avgtime <- usagetime_w %>% group_by(UserID,week) %>% summarise(avg = mean(totalTime))
avgtime_wide <- spread(avgtime, week, avg)
avgtime_wide <- merge(data38id,avgtime_wide)

2.2 Usage analysis split by app features

usagetime1 <- Aug30_usagetime
usagetime2 <- spread(usagetime1, Feature,'Usage Time (minute)')

2.2.1 total time

feature_1 <- usagetime2 %>% group_by(UserID,ActiveDate,week) %>% summarise(time=sum(discovery))
feature_1_w <- feature_1 %>% group_by(UserID,week) %>% summarise(timefeature_1 = sum(time,na.rm = T))
feature_1_s <- spread(feature_1_w, week, timefeature_1)
feature_1_m <- merge(data38id,feature_1_s)

head(feature_1_m,5) %>%knitr::kable()
UserID TIME id 26 27 28 29 30 31 32 33 34
0d6ddcac-bd09-44e3-908a-f89936588ab9 62.60 30 NA NA NA NA 3.3 0 NA 0.00 0
122557e2-637e-4c16-a515-ffab97ac2c9b 222.62 13 NA NA NA NA NA NA 32.77 NA NA
12fd4355-0eb5-4ca5-948a-549c9bdf90b0 223.63 12 NA 3.18 1.47 0.00 NA NA 0.00 0.00 NA
17835b88-a26e-439d-8922-d2c80ab99948 53.50 31 NA NA NA 6.17 NA 0 NA NA NA
2227d10d-ac61-4d2d-a3a1-aa21bdc24147 31.46 35 NA 0.00 NA NA NA NA 0.00 5.98 NA
feature_2 <- usagetime2 %>% group_by(UserID,ActiveDate,week) %>% summarise(time=sum(entertainment))
feature_2_w <- feature_2 %>% group_by(UserID,week) %>% summarise(timefeature_2 = sum(time,na.rm = T)) 
feature_2_s <- spread(feature_2_w, week, timefeature_2)
feature_2_m <- merge(data38id,feature_2_s)

head(feature_2_m,5) %>%knitr::kable()
UserID TIME id 26 27 28 29 30 31 32 33 34
0d6ddcac-bd09-44e3-908a-f89936588ab9 62.60 30 NA NA NA NA 8.55 1.27 NA 0.63 0.45
122557e2-637e-4c16-a515-ffab97ac2c9b 222.62 13 NA NA NA NA NA NA 135.15 NA NA
12fd4355-0eb5-4ca5-948a-549c9bdf90b0 223.63 12 NA 0.1 5.18 7.53 NA NA 7.68 4.75 NA
17835b88-a26e-439d-8922-d2c80ab99948 53.50 31 NA NA NA 6.98 NA 3.00 NA NA NA
2227d10d-ac61-4d2d-a3a1-aa21bdc24147 31.46 35 NA 0.0 NA NA NA NA 0.00 0.00 NA
feature_3 <- usagetime2 %>% group_by(UserID,ActiveDate,week) %>% summarise(time=sum(learn))
feature_3_w <- feature_3 %>% group_by(UserID,week) %>% summarise(timefeature_3 = sum(time,na.rm = T)) 
feature_3_s <- spread(feature_3_w, week, timefeature_3)
feature_3_m <- merge(data38id,feature_3_s)

head(feature_3_m,5) %>%knitr::kable()
UserID TIME id 26 27 28 29 30 31 32 33 34
0d6ddcac-bd09-44e3-908a-f89936588ab9 62.60 30 NA NA NA NA 19.12 1.63 NA 11.37 16.28
122557e2-637e-4c16-a515-ffab97ac2c9b 222.62 13 NA NA NA NA NA NA 54.70 NA NA
12fd4355-0eb5-4ca5-948a-549c9bdf90b0 223.63 12 NA 62.43 17.68 18.38 NA NA 47.68 47.57 NA
17835b88-a26e-439d-8922-d2c80ab99948 53.50 31 NA NA NA 25.43 NA 11.92 NA NA NA
2227d10d-ac61-4d2d-a3a1-aa21bdc24147 31.46 35 NA 3.95 NA NA NA NA 18.20 3.33 NA

2.2.2 number of login

feature_3$time[is.na(feature_3$time)] <- 0
feature_3_n <- feature_3 %>% filter(time > 0) %>% group_by(UserID,week) %>% summarise(n = n())

feature_2$time[is.na(feature_2$time)] <- 0
feature_2_n <- feature_2 %>% filter(time > 0) %>% group_by(UserID,week) %>% summarise(n = n())

feature_1$time[is.na(feature_1$time)] <- 0
feature_1_n <- feature_1 %>% filter(time > 0) %>% group_by(UserID,week) %>% summarise(n = n())

2.2.3 avg time

feature_1_avg <- feature_1 %>% group_by(UserID,week) %>% summarise(timefeature_1 = mean(time,na.rm = T)) 
feature_1_avgs <- spread(feature_1_avg, week, timefeature_1)
feature_1_avgm <- merge(data38id,feature_1_avgs)


feature_2_avg <- feature_2 %>% group_by(UserID,week) %>% summarise(timefeature_2 = mean(time,na.rm = T)) 
feature_2_avgs <- spread(feature_2_avg, week, timefeature_2)
feature_2_avgm <- merge(data38id,feature_2_avgs)


feature_3_avg <- feature_3 %>% group_by(UserID,week) %>% summarise(timefeature_3 = mean(time,na.rm = T)) 
feature_3_avgs <- spread(feature_3_avg, week, timefeature_3)
feature_3_avgm <- merge(data38id,feature_3_avgs)
head(feature_3_avgm,5) %>%knitr::kable()
UserID TIME id 26 27 28 29 30 31 32 33 34
0d6ddcac-bd09-44e3-908a-f89936588ab9 62.60 30 NA NA NA NA 19.12 1.63 NA 11.370 16.28
122557e2-637e-4c16-a515-ffab97ac2c9b 222.62 13 NA NA NA NA NA NA 10.94 NA NA
12fd4355-0eb5-4ca5-948a-549c9bdf90b0 223.63 12 NA 20.81 17.68 18.38 NA NA 23.84 23.785 NA
17835b88-a26e-439d-8922-d2c80ab99948 53.50 31 NA NA NA 25.43 NA 5.96 NA NA NA
2227d10d-ac61-4d2d-a3a1-aa21bdc24147 31.46 35 NA 3.95 NA NA NA NA 18.20 3.330 NA