## 7장 소스코드
# CSV 파일 읽어들이기
dau <- read.csv("section7-dau.csv", header = T, stringsAsFactors = F)
head(dau)
## region_month region_day app_name user_id device
## 1 2013-01 2013-01-01 game-02 10061580 FP
## 2 2013-01 2013-01-01 game-02 10154440 FP
## 3 2013-01 2013-01-01 game-02 10164762 SP
## 4 2013-01 2013-01-01 game-02 10165615 FP
## 5 2013-01 2013-01-01 game-02 10321356 FP
## 6 2013-01 2013-01-01 game-02 10406653 SP
# 유저별로 ID이전을 한 유저인지 아닌지를 나타내는 데이터를 정리
# MAU
mau <- unique (dau[, c("region_month", "device", "user_id")])
# FP MAU
fp.mau <- unique (dau[dau$device=="FP", c("region_month", "device","user_id")])
# SP MAU
sp.mau <- unique (dau[dau$device=="SP", c("region_month", "device","user_id")])
# 1월과 2월 데이터를 나누기
fp.mau1 <- fp.mau[fp.mau$region_month == "2013-01", ]
fp.mau2 <- fp.mau[fp.mau$region_month == "2013-02", ]
sp.mau1 <- sp.mau[sp.mau$region_month == "2013-01", ]
sp.mau2 <- sp.mau[sp.mau$region_month == "2013-02", ]
# 1월에 피쳐폰으로 이용했던 유저가 2월에도 이용했는가
mau$is_access <- 1
fp.mau1 <- merge(fp.mau1, mau[mau$region_month == "2013-02",c("user_id", "is_access")], by = "user_id", all.x = T)
fp.mau1$is_access[is.na(fp.mau1$is_access)] <- 0
head(fp.mau1)
## user_id region_month device is_access
## 1 397286 2013-01 FP 1
## 2 471341 2013-01 FP 1
## 3 503874 2013-01 FP 0
## 4 512250 2013-01 FP 1
## 5 513811 2013-01 FP 1
## 6 638688 2013-01 FP 1
# 1월에 피쳐폰으로 이용했고 2월에도 피쳐폰으로 이용했는가
fp.mau2$is_fp <- 1
fp.mau1 <- merge(fp.mau1, fp.mau2[, c("user_id", "is_fp")], by = "user_id", all.x = T)
fp.mau1$is_fp[is.na(fp.mau1$is_fp)] <- 0
head(fp.mau1)
## user_id region_month device is_access is_fp
## 1 397286 2013-01 FP 1 1
## 2 471341 2013-01 FP 1 0
## 3 503874 2013-01 FP 0 0
## 4 512250 2013-01 FP 1 1
## 5 513811 2013-01 FP 1 1
## 6 638688 2013-01 FP 1 1
# 1월에는 피쳐폰으로 이용하다가 2월에는 스마트폰으로 이용했는가
sp.mau2$is_sp <- 1
fp.mau1 <- merge(fp.mau1, sp.mau2[, c("user_id", "is_sp")],
by = "user_id", all.x = T)
fp.mau1$is_sp[is.na(fp.mau1$is_sp)] <- 0
head(fp.mau1)
## user_id region_month device is_access is_fp is_sp
## 1 397286 2013-01 FP 1 1 0
## 2 471341 2013-01 FP 1 0 1
## 3 503874 2013-01 FP 0 0 0
## 4 512250 2013-01 FP 1 1 0
## 5 513811 2013-01 FP 1 1 0
## 6 638688 2013-01 FP 1 1 0
# 1월에는 피쳐폰으로 이용했는데 2월에는 이용하지 않았거나 혹은 스마트폰으로 이용한 유저
fp.mau1 <- fp.mau1[fp.mau1$is_access == 0 | fp.mau1$is_sp == 1, ]
head(fp.mau1)
## user_id region_month device is_access is_fp is_sp
## 2 471341 2013-01 FP 1 0 1
## 3 503874 2013-01 FP 0 0 0
## 11 1073544 2013-01 FP 0 0 0
## 12 1073864 2013-01 FP 0 0 0
## 14 1163733 2013-01 FP 1 0 1
## 15 1454629 2013-01 FP 0 0 0
# 날짜별 게임 이용상황 데이터를 정리하기
library(reshape2)
fp.dau1 <- dau[dau$device == "FP" & dau$region_month == "2013-01", ]
fp.dau1$is_access <- 1
fp.dau1.cast <- dcast(fp.dau1, user_id ~ region_day, value.var = "is_access", function(x) as.character(length(x)))
names(fp.dau1.cast)[-1] <- paste0("X", 1:31, "day")
head(fp.dau1.cast)
## user_id X1day X2day X3day X4day X5day X6day X7day X8day X9day X10day
## 1 397286 1 1 1 1 1 1 1 1 1 1
## 2 471341 1 1 1 1 0 0 0 0 0 0
## 3 503874 1 0 0 0 0 0 0 0 0 0
## 4 512250 1 1 1 1 1 1 1 1 1 1
## 5 513811 0 0 0 0 0 0 0 0 0 0
## 6 638688 1 1 1 1 1 1 1 1 1 1
## X11day X12day X13day X14day X15day X16day X17day X18day X19day X20day
## 1 1 1 2 1 1 1 2 1 1 1
## 2 0 0 0 0 0 0 0 0 0 0
## 3 0 0 0 0 0 0 0 0 0 0
## 4 1 1 1 1 1 1 1 1 1 1
## 5 0 0 0 0 0 0 0 0 0 0
## 6 1 1 1 1 1 1 1 1 1 1
## X21day X22day X23day X24day X25day X26day X27day X28day X29day X30day
## 1 1 1 1 1 1 1 1 1 1 1
## 2 0 0 0 0 0 0 0 0 0 0
## 3 0 0 0 0 0 0 0 0 0 0
## 4 1 1 1 1 1 1 1 1 1 1
## 5 0 1 0 0 0 0 0 1 1 0
## 6 1 1 1 1 1 1 1 1 1 1
## X31day
## 1 1
## 2 0
## 3 0
## 4 1
## 5 1
## 6 1
# 2월에 스마트폰으로 이용한 유저 데이터를 결합하기
fp.dau1.cast <- merge(fp.dau1.cast, fp.mau1[, c("user_id", "is_sp")],
by = "user_id")
head(fp.dau1.cast)
## user_id X1day X2day X3day X4day X5day X6day X7day X8day X9day X10day
## 1 471341 1 1 1 1 0 0 0 0 0 0
## 2 503874 1 0 0 0 0 0 0 0 0 0
## 3 1073544 0 0 0 0 0 0 0 0 0 1
## 4 1073864 0 0 0 0 0 0 0 0 0 0
## 5 1163733 1 1 0 0 0 0 0 0 0 0
## 6 1454629 0 0 0 0 0 0 0 0 0 0
## X11day X12day X13day X14day X15day X16day X17day X18day X19day X20day
## 1 0 0 0 0 0 0 0 0 0 0
## 2 0 0 0 0 0 0 0 0 0 0
## 3 0 0 0 0 0 0 0 0 0 0
## 4 1 0 0 1 0 0 0 0 0 0
## 5 0 0 0 0 0 0 1 1 0 0
## 6 0 0 1 0 0 0 0 0 0 0
## X21day X22day X23day X24day X25day X26day X27day X28day X29day X30day
## 1 0 0 0 0 0 0 0 0 0 0
## 2 0 0 0 0 0 0 0 0 0 0
## 3 0 0 1 1 1 0 0 0 0 0
## 4 0 0 0 0 0 0 0 0 0 0
## 5 1 1 1 1 1 1 1 1 0 0
## 6 0 0 0 0 0 0 0 0 0 0
## X31day is_sp
## 1 0 1
## 2 0 0
## 3 0 0
## 4 0 0
## 5 0 1
## 6 0 0
table(fp.dau1.cast$is_sp)
##
## 0 1
## 190 62
# 로지스틱 회귀분석을 통한 모델 작성
fit.logit <- step(glm(is_sp ~ ., data = fp.dau1.cast[, -1], family = binomial))
## Start: AIC=178.32
## is_sp ~ X1day + X2day + X3day + X4day + X5day + X6day + X7day +
## X8day + X9day + X10day + X11day + X12day + X13day + X14day +
## X15day + X16day + X17day + X18day + X19day + X20day + X21day +
## X22day + X23day + X24day + X25day + X26day + X27day + X28day +
## X29day + X30day + X31day
##
## Df Deviance AIC
## - X23day 2 114.06 176.06
## - X25day 1 112.33 176.33
## - X26day 1 112.36 176.36
## - X28day 1 112.38 176.38
## - X20day 1 112.38 176.38
## - X27day 1 112.39 176.39
## - X2day 1 112.41 176.41
## - X17day 1 112.45 176.45
## - X9day 1 112.46 176.46
## - X3day 1 112.48 176.49
## - X16day 1 112.49 176.49
## - X12day 1 112.50 176.50
## - X19day 1 112.58 176.58
## - X6day 1 112.65 176.65
## - X21day 1 112.90 176.90
## - X8day 1 113.09 177.09
## - X14day 1 113.23 177.23
## - X22day 1 113.43 177.43
## - X11day 1 113.66 177.66
## - X31day 1 113.79 177.79
## - X18day 1 113.81 177.81
## - X15day 1 114.27 178.27
## <none> 112.32 178.32
## - X29day 1 114.55 178.55
## - X13day 1 114.64 178.64
## - X30day 1 114.65 178.65
## - X24day 1 114.78 178.78
## - X5day 1 114.94 178.94
## - X7day 1 115.61 179.61
## - X10day 1 117.86 181.86
## - X4day 1 118.08 182.09
## - X1day 1 118.61 182.61
##
## Step: AIC=176.06
## is_sp ~ X1day + X2day + X3day + X4day + X5day + X6day + X7day +
## X8day + X9day + X10day + X11day + X12day + X13day + X14day +
## X15day + X16day + X17day + X18day + X19day + X20day + X21day +
## X22day + X24day + X25day + X26day + X27day + X28day + X29day +
## X30day + X31day
##
## Df Deviance AIC
## - X26day 1 114.06 174.06
## - X20day 1 114.06 174.06
## - X28day 1 114.07 174.07
## - X2day 1 114.09 174.09
## - X9day 1 114.09 174.09
## - X27day 1 114.09 174.09
## - X16day 1 114.11 174.12
## - X17day 1 114.14 174.15
## - X21day 1 114.17 174.17
## - X25day 1 114.18 174.18
## - X12day 1 114.21 174.21
## - X3day 1 114.38 174.38
## - X19day 1 114.52 174.52
## - X6day 1 114.62 174.62
## - X11day 1 114.81 174.81
## - X14day 1 114.94 174.94
## - X8day 1 115.12 175.12
## - X22day 1 115.23 175.23
## - X5day 1 115.79 175.79
## - X18day 1 115.79 175.79
## - X30day 1 115.82 175.82
## - X29day 1 115.82 175.82
## - X15day 1 115.98 175.98
## <none> 114.06 176.06
## - X24day 1 116.07 176.07
## - X31day 1 116.11 176.11
## - X13day 1 116.32 176.32
## - X7day 1 116.65 176.65
## - X4day 1 119.59 179.59
## - X10day 1 119.61 179.61
## - X1day 1 119.68 179.68
##
## Step: AIC=174.06
## is_sp ~ X1day + X2day + X3day + X4day + X5day + X6day + X7day +
## X8day + X9day + X10day + X11day + X12day + X13day + X14day +
## X15day + X16day + X17day + X18day + X19day + X20day + X21day +
## X22day + X24day + X25day + X27day + X28day + X29day + X30day +
## X31day
##
## Df Deviance AIC
## - X20day 1 114.06 172.06
## - X28day 1 114.07 172.07
## - X2day 1 114.09 172.09
## - X9day 1 114.09 172.09
## - X27day 1 114.09 172.09
## - X16day 1 114.12 172.12
## - X17day 1 114.15 172.15
## - X21day 1 114.17 172.17
## - X25day 1 114.20 172.20
## - X12day 1 114.22 172.22
## - X3day 1 114.38 172.38
## - X19day 1 114.53 172.53
## - X6day 1 114.66 172.66
## - X11day 1 114.81 172.81
## - X14day 1 114.94 172.94
## - X8day 1 115.13 173.13
## - X22day 1 115.23 173.23
## - X30day 1 115.82 173.82
## - X29day 1 115.82 173.82
## - X5day 1 115.84 173.84
## - X18day 1 115.97 173.97
## - X15day 1 115.99 173.99
## <none> 114.06 174.06
## - X31day 1 116.11 174.11
## - X24day 1 116.29 174.29
## - X13day 1 116.36 174.36
## - X7day 1 116.71 174.71
## - X4day 1 119.60 177.60
## - X10day 1 119.62 177.62
## - X1day 1 119.75 177.75
##
## Step: AIC=172.06
## is_sp ~ X1day + X2day + X3day + X4day + X5day + X6day + X7day +
## X8day + X9day + X10day + X11day + X12day + X13day + X14day +
## X15day + X16day + X17day + X18day + X19day + X21day + X22day +
## X24day + X25day + X27day + X28day + X29day + X30day + X31day
##
## Df Deviance AIC
## - X28day 1 114.07 170.07
## - X2day 1 114.09 170.09
## - X9day 1 114.09 170.09
## - X27day 1 114.09 170.09
## - X16day 1 114.12 170.12
## - X17day 1 114.15 170.15
## - X21day 1 114.17 170.17
## - X25day 1 114.20 170.20
## - X12day 1 114.22 170.22
## - X3day 1 114.39 170.39
## - X19day 1 114.65 170.65
## - X6day 1 114.67 170.67
## - X11day 1 114.84 170.84
## - X14day 1 114.97 170.97
## - X8day 1 115.14 171.14
## - X22day 1 115.27 171.27
## - X30day 1 115.84 171.84
## - X5day 1 115.85 171.85
## - X29day 1 115.85 171.85
## <none> 114.06 172.06
## - X15day 1 116.07 172.07
## - X31day 1 116.12 172.12
## - X18day 1 116.12 172.12
## - X24day 1 116.31 172.31
## - X13day 1 116.37 172.37
## - X7day 1 116.72 172.72
## - X4day 1 119.67 175.67
## - X1day 1 119.82 175.82
## - X10day 1 119.98 175.98
##
## Step: AIC=170.07
## is_sp ~ X1day + X2day + X3day + X4day + X5day + X6day + X7day +
## X8day + X9day + X10day + X11day + X12day + X13day + X14day +
## X15day + X16day + X17day + X18day + X19day + X21day + X22day +
## X24day + X25day + X27day + X29day + X30day + X31day
##
## Df Deviance AIC
## - X2day 1 114.10 168.10
## - X9day 1 114.11 168.11
## - X27day 1 114.11 168.11
## - X16day 1 114.13 168.13
## - X17day 1 114.18 168.18
## - X21day 1 114.21 168.21
## - X12day 1 114.25 168.25
## - X25day 1 114.25 168.25
## - X3day 1 114.42 168.42
## - X19day 1 114.65 168.65
## - X6day 1 114.67 168.67
## - X11day 1 114.92 168.92
## - X14day 1 115.04 169.04
## - X8day 1 115.15 169.15
## - X22day 1 115.27 169.27
## - X5day 1 115.95 169.95
## <none> 114.07 170.07
## - X15day 1 116.09 170.09
## - X18day 1 116.12 170.12
## - X29day 1 116.15 170.15
## - X31day 1 116.17 170.17
## - X30day 1 116.21 170.21
## - X24day 1 116.35 170.35
## - X13day 1 116.41 170.41
## - X7day 1 116.75 170.75
## - X4day 1 119.67 173.67
## - X1day 1 119.88 173.88
## - X10day 1 120.12 174.12
##
## Step: AIC=168.1
## is_sp ~ X1day + X3day + X4day + X5day + X6day + X7day + X8day +
## X9day + X10day + X11day + X12day + X13day + X14day + X15day +
## X16day + X17day + X18day + X19day + X21day + X22day + X24day +
## X25day + X27day + X29day + X30day + X31day
##
## Df Deviance AIC
## - X9day 1 114.13 166.13
## - X27day 1 114.14 166.14
## - X16day 1 114.15 166.15
## - X17day 1 114.21 166.21
## - X21day 1 114.22 166.22
## - X25day 1 114.28 166.28
## - X12day 1 114.33 166.33
## - X6day 1 114.68 166.68
## - X19day 1 114.70 166.70
## - X3day 1 114.78 166.78
## - X11day 1 115.00 167.00
## - X14day 1 115.05 167.05
## - X8day 1 115.20 167.20
## - X22day 1 115.29 167.29
## - X5day 1 115.97 167.97
## <none> 114.10 168.10
## - X29day 1 116.19 168.19
## - X15day 1 116.21 168.21
## - X31day 1 116.26 168.26
## - X24day 1 116.35 168.35
## - X18day 1 116.37 168.37
## - X30day 1 116.48 168.48
## - X13day 1 116.55 168.55
## - X7day 1 116.75 168.75
## - X10day 1 120.14 172.13
## - X4day 1 120.32 172.32
## - X1day 1 122.03 174.03
##
## Step: AIC=166.13
## is_sp ~ X1day + X3day + X4day + X5day + X6day + X7day + X8day +
## X10day + X11day + X12day + X13day + X14day + X15day + X16day +
## X17day + X18day + X19day + X21day + X22day + X24day + X25day +
## X27day + X29day + X30day + X31day
##
## Df Deviance AIC
## - X16day 1 114.17 164.17
## - X27day 1 114.17 164.17
## - X17day 1 114.25 164.25
## - X21day 1 114.25 164.25
## - X25day 1 114.31 164.31
## - X12day 1 114.38 164.38
## - X6day 1 114.69 164.69
## - X19day 1 114.71 164.71
## - X3day 1 114.84 164.84
## - X11day 1 115.05 165.05
## - X14day 1 115.11 165.11
## - X8day 1 115.24 165.24
## - X22day 1 115.32 165.32
## <none> 114.13 166.13
## - X5day 1 116.19 166.19
## - X29day 1 116.20 166.21
## - X15day 1 116.28 166.28
## - X31day 1 116.31 166.31
## - X24day 1 116.38 166.38
## - X18day 1 116.38 166.38
## - X30day 1 116.55 166.54
## - X13day 1 116.57 166.57
## - X7day 1 116.75 166.75
## - X4day 1 120.40 170.40
## - X10day 1 120.41 170.41
## - X1day 1 122.37 172.37
##
## Step: AIC=164.17
## is_sp ~ X1day + X3day + X4day + X5day + X6day + X7day + X8day +
## X10day + X11day + X12day + X13day + X14day + X15day + X17day +
## X18day + X19day + X21day + X22day + X24day + X25day + X27day +
## X29day + X30day + X31day
##
## Df Deviance AIC
## - X27day 1 114.21 162.21
## - X17day 1 114.27 162.27
## - X21day 1 114.31 162.31
## - X25day 1 114.37 162.37
## - X12day 1 114.42 162.42
## - X19day 1 114.71 162.71
## - X6day 1 114.71 162.71
## - X3day 1 114.87 162.87
## - X11day 1 115.05 163.05
## - X14day 1 115.20 163.20
## - X22day 1 115.32 163.32
## - X8day 1 115.45 163.45
## <none> 114.17 164.17
## - X5day 1 116.19 164.19
## - X29day 1 116.33 164.32
## - X15day 1 116.33 164.33
## - X18day 1 116.43 164.43
## - X31day 1 116.46 164.46
## - X24day 1 116.52 164.52
## - X13day 1 116.58 164.58
## - X30day 1 116.67 164.67
## - X7day 1 116.77 164.77
## - X10day 1 120.42 168.42
## - X4day 1 120.50 168.50
## - X1day 1 122.44 170.44
##
## Step: AIC=162.21
## is_sp ~ X1day + X3day + X4day + X5day + X6day + X7day + X8day +
## X10day + X11day + X12day + X13day + X14day + X15day + X17day +
## X18day + X19day + X21day + X22day + X24day + X25day + X29day +
## X30day + X31day
##
## Df Deviance AIC
## - X17day 1 114.31 160.31
## - X25day 1 114.37 160.37
## - X21day 1 114.43 160.43
## - X12day 1 114.46 160.46
## - X6day 1 114.77 160.77
## - X19day 1 114.87 160.87
## - X3day 1 114.93 160.93
## - X11day 1 115.05 161.05
## - X14day 1 115.34 161.34
## - X22day 1 115.34 161.34
## - X8day 1 115.67 161.67
## - X5day 1 116.20 162.20
## <none> 114.21 162.21
## - X15day 1 116.33 162.33
## - X18day 1 116.48 162.48
## - X31day 1 116.48 162.48
## - X29day 1 116.50 162.50
## - X24day 1 116.63 162.63
## - X7day 1 116.81 162.81
## - X30day 1 116.94 162.94
## - X13day 1 117.03 163.03
## - X4day 1 120.55 166.54
## - X10day 1 120.65 166.65
## - X1day 1 122.53 168.53
##
## Step: AIC=160.31
## is_sp ~ X1day + X3day + X4day + X5day + X6day + X7day + X8day +
## X10day + X11day + X12day + X13day + X14day + X15day + X18day +
## X19day + X21day + X22day + X24day + X25day + X29day + X30day +
## X31day
##
## Df Deviance AIC
## - X25day 1 114.45 158.45
## - X21day 1 114.48 158.48
## - X12day 1 114.63 158.63
## - X6day 1 114.82 158.82
## - X19day 1 114.92 158.92
## - X3day 1 114.96 158.96
## - X11day 1 115.25 159.25
## - X22day 1 115.41 159.41
## - X14day 1 115.66 159.66
## - X8day 1 115.77 159.77
## - X5day 1 116.20 160.20
## <none> 114.31 160.31
## - X18day 1 116.48 160.48
## - X31day 1 116.64 160.64
## - X24day 1 116.67 160.67
## - X15day 1 116.73 160.73
## - X29day 1 116.86 160.86
## - X7day 1 116.96 160.96
## - X30day 1 117.06 161.06
## - X13day 1 117.12 161.12
## - X4day 1 120.56 164.56
## - X10day 1 121.11 165.11
## - X1day 1 122.67 166.67
##
## Step: AIC=158.45
## is_sp ~ X1day + X3day + X4day + X5day + X6day + X7day + X8day +
## X10day + X11day + X12day + X13day + X14day + X15day + X18day +
## X19day + X21day + X22day + X24day + X29day + X30day + X31day
##
## Df Deviance AIC
## - X21day 1 114.58 156.58
## - X12day 1 114.86 156.86
## - X6day 1 114.95 156.95
## - X3day 1 115.07 157.07
## - X19day 1 115.15 157.15
## - X22day 1 115.47 157.47
## - X11day 1 115.48 157.48
## - X14day 1 115.80 157.79
## - X8day 1 115.90 157.90
## <none> 114.45 158.45
## - X5day 1 116.48 158.48
## - X18day 1 116.64 158.64
## - X29day 1 116.86 158.86
## - X24day 1 116.90 158.90
## - X31day 1 116.92 158.92
## - X15day 1 116.93 158.93
## - X7day 1 117.02 159.02
## - X30day 1 117.11 159.11
## - X13day 1 117.44 159.44
## - X4day 1 120.67 162.67
## - X10day 1 121.13 163.13
## - X1day 1 123.01 165.01
##
## Step: AIC=156.58
## is_sp ~ X1day + X3day + X4day + X5day + X6day + X7day + X8day +
## X10day + X11day + X12day + X13day + X14day + X15day + X18day +
## X19day + X22day + X24day + X29day + X30day + X31day
##
## Df Deviance AIC
## - X6day 1 115.02 155.02
## - X12day 1 115.07 155.07
## - X3day 1 115.13 155.13
## - X19day 1 115.25 155.25
## - X14day 1 115.94 155.94
## - X8day 1 116.09 156.09
## - X11day 1 116.27 156.27
## <none> 114.58 156.58
## - X22day 1 116.64 156.65
## - X5day 1 116.70 156.70
## - X31day 1 117.05 157.05
## - X30day 1 117.13 157.13
## - X15day 1 117.13 157.13
## - X18day 1 117.19 157.19
## - X24day 1 117.22 157.22
## - X7day 1 117.31 157.31
## - X13day 1 117.68 157.68
## - X29day 1 117.92 157.92
## - X4day 1 120.86 160.86
## - X10day 1 122.30 162.30
## - X1day 1 123.01 163.01
##
## Step: AIC=155.02
## is_sp ~ X1day + X3day + X4day + X5day + X7day + X8day + X10day +
## X11day + X12day + X13day + X14day + X15day + X18day + X19day +
## X22day + X24day + X29day + X30day + X31day
##
## Df Deviance AIC
## - X3day 1 115.43 153.43
## - X19day 1 115.56 153.56
## - X12day 1 115.73 153.73
## - X14day 1 116.47 154.47
## - X8day 1 116.51 154.51
## - X11day 1 116.69 154.69
## <none> 115.02 155.02
## - X31day 1 117.32 155.32
## - X18day 1 117.37 155.37
## - X30day 1 117.49 155.49
## - X24day 1 117.52 155.51
## - X7day 1 117.54 155.54
## - X15day 1 117.60 155.60
## - X22day 1 117.71 155.71
## - X13day 1 117.96 155.96
## - X29day 1 118.08 156.08
## - X5day 1 118.17 156.17
## - X4day 1 121.06 159.06
## - X10day 1 122.93 160.93
## - X1day 1 123.25 161.25
##
## Step: AIC=153.43
## is_sp ~ X1day + X4day + X5day + X7day + X8day + X10day + X11day +
## X12day + X13day + X14day + X15day + X18day + X19day + X22day +
## X24day + X29day + X30day + X31day
##
## Df Deviance AIC
## - X19day 1 116.02 152.01
## - X12day 1 116.07 152.07
## - X11day 1 116.91 152.91
## - X14day 1 116.94 152.94
## - X8day 1 117.18 153.18
## <none> 115.43 153.43
## - X18day 1 117.59 153.59
## - X15day 1 117.73 153.74
## - X24day 1 117.74 153.74
## - X31day 1 117.79 153.79
## - X7day 1 117.80 153.80
## - X30day 1 117.99 153.99
## - X13day 1 118.05 154.05
## - X5day 1 118.27 154.27
## - X22day 1 118.28 154.28
## - X29day 1 118.30 154.29
## - X10day 1 122.94 158.94
## - X4day 1 123.68 159.68
## - X1day 1 124.50 160.50
##
## Step: AIC=152.02
## is_sp ~ X1day + X4day + X5day + X7day + X8day + X10day + X11day +
## X12day + X13day + X14day + X15day + X18day + X22day + X24day +
## X29day + X30day + X31day
##
## Df Deviance AIC
## - X12day 1 116.80 150.80
## - X11day 1 117.41 151.41
## - X14day 1 117.77 151.76
## - X18day 1 117.77 151.77
## - X8day 1 117.85 151.85
## <none> 116.02 152.01
## - X24day 1 118.14 152.13
## - X15day 1 118.22 152.22
## - X22day 1 118.28 152.28
## - X30day 1 118.33 152.32
## - X7day 1 118.55 152.55
## - X31day 1 118.79 152.79
## - X5day 1 118.90 152.90
## - X29day 1 118.91 152.91
## - X13day 1 119.11 153.12
## - X4day 1 124.25 158.25
## - X10day 1 124.52 158.52
## - X1day 1 125.22 159.22
##
## Step: AIC=150.8
## is_sp ~ X1day + X4day + X5day + X7day + X8day + X10day + X11day +
## X13day + X14day + X15day + X18day + X22day + X24day + X29day +
## X30day + X31day
##
## Df Deviance AIC
## - X11day 1 117.63 149.63
## - X18day 1 118.53 150.53
## - X24day 1 118.73 150.73
## - X8day 1 118.74 150.74
## - X14day 1 118.79 150.79
## <none> 116.80 150.80
## - X22day 1 118.81 150.81
## - X15day 1 118.88 150.88
## - X30day 1 118.99 150.99
## - X13day 1 119.19 151.19
## - X7day 1 119.31 151.31
## - X31day 1 119.39 151.40
## - X5day 1 119.63 151.63
## - X29day 1 121.21 153.21
## - X4day 1 124.37 156.37
## - X1day 1 125.70 157.70
## - X10day 1 127.57 159.57
##
## Step: AIC=149.63
## is_sp ~ X1day + X4day + X5day + X7day + X8day + X10day + X13day +
## X14day + X15day + X18day + X22day + X24day + X29day + X30day +
## X31day
##
## Df Deviance AIC
## - X15day 1 119.28 149.28
## - X24day 1 119.31 149.31
## - X22day 1 119.36 149.36
## - X8day 1 119.37 149.37
## <none> 117.63 149.63
## - X14day 1 119.63 149.63
## - X30day 1 119.67 149.67
## - X18day 1 119.71 149.71
## - X31day 1 120.08 150.08
## - X7day 1 120.28 150.28
## - X5day 1 120.83 150.83
## - X13day 1 121.75 151.75
## - X29day 1 122.39 152.39
## - X1day 1 126.29 156.29
## - X4day 1 127.14 157.15
## - X10day 1 127.64 157.64
##
## Step: AIC=149.28
## is_sp ~ X1day + X4day + X5day + X7day + X8day + X10day + X13day +
## X14day + X18day + X22day + X24day + X29day + X30day + X31day
##
## Df Deviance AIC
## - X18day 1 120.30 148.30
## - X24day 1 120.66 148.66
## - X8day 1 120.84 148.84
## - X30day 1 121.01 149.01
## - X22day 1 121.06 149.06
## - X31day 1 121.23 149.23
## <none> 119.28 149.28
## - X14day 1 122.22 150.22
## - X13day 1 122.39 150.39
## - X5day 1 122.49 150.49
## - X7day 1 122.76 150.76
## - X29day 1 123.87 151.87
## - X1day 1 127.25 155.25
## - X4day 1 128.08 156.08
## - X10day 1 129.83 157.83
##
## Step: AIC=148.3
## is_sp ~ X1day + X4day + X5day + X7day + X8day + X10day + X13day +
## X14day + X22day + X24day + X29day + X30day + X31day
##
## Df Deviance AIC
## - X30day 1 121.86 147.85
## - X8day 1 121.86 147.86
## - X31day 1 122.00 148.00
## <none> 120.30 148.30
## - X24day 1 122.42 148.42
## - X14day 1 122.52 148.52
## - X22day 1 123.03 149.03
## - X13day 1 123.18 149.18
## - X7day 1 123.27 149.27
## - X5day 1 123.50 149.50
## - X29day 1 124.96 150.96
## - X1day 1 128.52 154.52
## - X4day 1 128.64 154.64
## - X10day 1 129.85 155.85
##
## Step: AIC=147.86
## is_sp ~ X1day + X4day + X5day + X7day + X8day + X10day + X13day +
## X14day + X22day + X24day + X29day + X31day
##
## Df Deviance AIC
## - X8day 1 123.47 147.47
## - X14day 1 123.59 147.59
## <none> 121.86 147.85
## - X24day 1 124.11 148.11
## - X22day 1 124.24 148.24
## - X13day 1 124.29 148.29
## - X5day 1 125.14 149.14
## - X7day 1 125.45 149.45
## - X31day 1 126.11 150.11
## - X29day 1 129.25 153.25
## - X4day 1 129.93 153.93
## - X1day 1 130.61 154.61
## - X10day 1 132.07 156.07
##
## Step: AIC=147.47
## is_sp ~ X1day + X4day + X5day + X7day + X10day + X13day + X14day +
## X22day + X24day + X29day + X31day
##
## Df Deviance AIC
## - X14day 1 125.30 147.30
## <none> 123.47 147.47
## - X24day 1 125.88 147.88
## - X22day 1 126.33 148.33
## - X5day 1 126.43 148.43
## - X31day 1 127.43 149.43
## - X13day 1 127.66 149.66
## - X7day 1 130.74 152.74
## - X1day 1 130.91 152.91
## - X4day 1 131.28 153.28
## - X10day 1 132.11 154.11
## - X29day 1 132.28 154.28
##
## Step: AIC=147.3
## is_sp ~ X1day + X4day + X5day + X7day + X10day + X13day + X22day +
## X24day + X29day + X31day
##
## Df Deviance AIC
## - X24day 1 126.73 146.73
## <none> 125.30 147.30
## - X5day 1 127.41 147.41
## - X13day 1 128.18 148.18
## - X22day 1 128.21 148.21
## - X31day 1 129.21 149.21
## - X7day 1 131.12 151.12
## - X1day 1 131.85 151.85
## - X4day 1 132.76 152.76
## - X29day 1 133.76 153.76
## - X10day 1 135.00 155.00
##
## Step: AIC=146.73
## is_sp ~ X1day + X4day + X5day + X7day + X10day + X13day + X22day +
## X29day + X31day
##
## Df Deviance AIC
## <none> 126.73 146.73
## - X5day 1 128.74 146.74
## - X13day 1 130.04 148.04
## - X31day 1 130.92 148.92
## - X7day 1 132.04 150.04
## - X22day 1 132.97 150.97
## - X1day 1 133.86 151.86
## - X4day 1 134.08 152.08
## - X29day 1 135.86 153.86
## - X10day 1 136.13 154.13
summary(fit.logit)
##
## Call:
## glm(formula = is_sp ~ X1day + X4day + X5day + X7day + X10day +
## X13day + X22day + X29day + X31day, family = binomial, data = fp.dau1.cast[,
## -1])
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.95538 -0.45175 -0.23178 -0.06122 2.69461
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -3.6036 0.4269 -8.441 < 2e-16 ***
## X1day1 1.5334 0.5720 2.681 0.00735 **
## X4day1 1.7753 0.6424 2.764 0.00572 **
## X5day1 -1.0353 0.7622 -1.358 0.17437
## X7day1 1.7002 0.7109 2.392 0.01678 *
## X10day1 -2.6753 0.9418 -2.841 0.00450 **
## X13day1 1.3726 0.7547 1.819 0.06893 .
## X22day1 1.6233 0.6382 2.543 0.01098 *
## X29day1 2.0012 0.6480 3.088 0.00201 **
## X31day1 1.7310 0.8143 2.126 0.03352 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 281.20 on 251 degrees of freedom
## Residual deviance: 126.73 on 242 degrees of freedom
## AIC: 146.73
##
## Number of Fisher Scoring iterations: 6
par(mfrow=c(2,2))
plot(fit.logit)
# 작성된 모델을 이용해 예측하기
# SP(스마트폰) 이전 확률
fp.dau1.cast$prob <- round(fitted(fit.logit), 2)
# SP(스마트폰)으로 이전할 지 예측
fp.dau1.cast$pred <- ifelse(fp.dau1.cast$prob > 0.5, 1, 0)
head(fp.dau1.cast)
## user_id X1day X2day X3day X4day X5day X6day X7day X8day X9day X10day
## 1 471341 1 1 1 1 0 0 0 0 0 0
## 2 503874 1 0 0 0 0 0 0 0 0 0
## 3 1073544 0 0 0 0 0 0 0 0 0 1
## 4 1073864 0 0 0 0 0 0 0 0 0 0
## 5 1163733 1 1 0 0 0 0 0 0 0 0
## 6 1454629 0 0 0 0 0 0 0 0 0 0
## X11day X12day X13day X14day X15day X16day X17day X18day X19day X20day
## 1 0 0 0 0 0 0 0 0 0 0
## 2 0 0 0 0 0 0 0 0 0 0
## 3 0 0 0 0 0 0 0 0 0 0
## 4 1 0 0 1 0 0 0 0 0 0
## 5 0 0 0 0 0 0 1 1 0 0
## 6 0 0 1 0 0 0 0 0 0 0
## X21day X22day X23day X24day X25day X26day X27day X28day X29day X30day
## 1 0 0 0 0 0 0 0 0 0 0
## 2 0 0 0 0 0 0 0 0 0 0
## 3 0 0 1 1 1 0 0 0 0 0
## 4 0 0 0 0 0 0 0 0 0 0
## 5 1 1 1 1 1 1 1 1 0 0
## 6 0 0 0 0 0 0 0 0 0 0
## X31day is_sp prob pred
## 1 0 1 0.43 0
## 2 0 0 0.11 0
## 3 0 0 0.00 0
## 4 0 0 0.03 0
## 5 0 1 0.39 0
## 6 0 0 0.10 0
# 예측과 실제
table(fp.dau1.cast[, c("is_sp", "pred")])
## pred
## is_sp 0 1
## 0 180 10
## 1 20 42
# 예측결과로부터 유저군을 추측하기
fp.dau1.cast1 <- fp.dau1.cast[fp.dau1.cast$is_sp == 1 & fp.dau1.cast$pred == 1, ]
head(fp.dau1.cast1[order(fp.dau1.cast1$prob, decreasing = T), ])
## user_id X1day X2day X3day X4day X5day X6day X7day X8day X9day X10day
## 137 24791702 1 1 0 1 0 1 1 1 1 1
## 138 24791702 1 1 0 1 0 1 1 1 1 1
## 22 5526146 1 1 1 1 1 1 1 1 1 1
## 44 9567562 1 1 1 1 1 1 1 1 1 1
## 45 9567562 1 1 1 1 1 1 1 1 1 1
## 86 16557842 1 1 1 1 1 1 1 1 1 1
## X11day X12day X13day X14day X15day X16day X17day X18day X19day X20day
## 137 1 1 1 1 1 1 1 1 0 1
## 138 1 1 1 1 1 1 1 1 0 1
## 22 1 1 1 1 1 1 1 1 1 1
## 44 1 1 1 1 1 1 1 1 0 1
## 45 1 1 1 1 1 1 1 1 0 1
## 86 1 1 1 1 1 1 1 1 1 1
## X21day X22day X23day X24day X25day X26day X27day X28day X29day X30day
## 137 1 1 1 1 1 1 1 1 1 1
## 138 1 1 1 1 1 1 1 1 1 1
## 22 1 1 1 1 1 1 1 1 1 1
## 44 1 1 1 1 1 1 1 1 1 1
## 45 1 1 1 1 1 1 1 1 1 1
## 86 1 1 1 1 1 1 1 1 1 1
## X31day is_sp prob pred
## 137 1 1 1.00 1
## 138 1 1 1.00 1
## 22 1 1 0.99 1
## 44 1 1 0.99 1
## 45 1 1 0.99 1
## 86 1 1 0.99 1
fp.dau1.cast2 <- fp.dau1.cast[fp.dau1.cast$is_sp == 0 & fp.dau1.cast$pred == 1, ]
head(fp.dau1.cast2[order(fp.dau1.cast2$prob, decreasing = T), ])
## user_id X1day X2day X3day X4day X5day X6day X7day X8day X9day X10day
## 109 19432099 1 1 1 1 0 1 1 1 1 1
## 195 41590801 0 0 0 0 0 0 0 0 0 0
## 204 43451947 1 1 1 1 1 0 1 1 1 1
## 198 42276142 1 1 1 1 1 1 0 1 1 1
## 28 6147878 1 0 0 1 1 1 1 1 1 1
## 210 46285446 0 0 0 0 1 1 1 1 1 0
## X11day X12day X13day X14day X15day X16day X17day X18day X19day X20day
## 109 1 1 1 1 0 1 1 0 1 1
## 195 0 0 0 0 0 0 0 0 0 0
## 204 0 0 0 0 0 0 0 0 1 0
## 198 0 1 1 0 1 1 1 1 1 1
## 28 1 1 1 1 1 1 1 1 1 1
## 210 0 0 1 1 0 1 0 0 0 1
## X21day X22day X23day X24day X25day X26day X27day X28day X29day X30day
## 109 1 1 1 0 0 0 0 0 0 0
## 195 1 1 0 0 0 0 0 0 1 0
## 204 0 1 0 0 1 0 0 1 1 0
## 198 1 1 1 1 1 1 1 1 1 0
## 28 1 1 1 1 1 1 0 0 0 0
## 210 1 0 1 1 1 1 1 0 1 0
## X31day is_sp prob pred
## 109 0 0 0.85 1
## 195 1 0 0.85 1
## 204 0 0 0.79 1
## 198 0 0 0.73 1
## 28 0 0 0.67 1
## 210 0 0 0.61 1
fp.dau1.cast3 <- fp.dau1.cast[fp.dau1.cast$is_sp == 0 & fp.dau1.cast$pred == 0, ]
head(fp.dau1.cast3[order(fp.dau1.cast3$prob), ])
## user_id X1day X2day X3day X4day X5day X6day X7day X8day X9day X10day
## 3 1073544 0 0 0 0 0 0 0 0 0 1
## 11 2541741 0 0 0 0 0 0 0 0 0 1
## 150 27249550 0 0 0 1 1 1 0 0 0 1
## 243 60725457 0 0 0 0 0 0 0 0 0 1
## 71 13967453 0 0 0 0 1 0 0 0 0 0
## 88 16601600 0 0 0 0 1 0 0 0 0 0
## X11day X12day X13day X14day X15day X16day X17day X18day X19day X20day
## 3 0 0 0 0 0 0 0 0 0 0
## 11 0 0 0 0 0 0 0 0 0 0
## 150 0 1 0 0 0 0 0 0 0 0
## 243 0 0 0 0 0 0 0 0 0 0
## 71 0 0 0 0 0 0 0 0 0 0
## 88 0 0 0 0 0 0 0 0 0 0
## X21day X22day X23day X24day X25day X26day X27day X28day X29day X30day
## 3 0 0 1 1 1 0 0 0 0 0
## 11 0 0 0 0 0 0 0 0 0 0
## 150 0 0 0 0 0 0 0 0 0 0
## 243 0 0 0 0 0 0 0 0 0 0
## 71 0 0 0 0 0 0 0 0 0 0
## 88 0 0 0 0 0 0 0 0 0 0
## X31day is_sp prob pred
## 3 0 0 0.00 0
## 11 0 0 0.00 0
## 150 0 0 0.00 0
## 243 0 0 0.00 0
## 71 0 0 0.01 0
## 88 0 0 0.01 0
str(fp.dau1.cast3)
## 'data.frame': 180 obs. of 35 variables:
## $ user_id: int 503874 1073544 1073864 1454629 1557628 2313236 2477685 2541741 2628661 3955950 ...
## $ X1day : chr "1" "0" "0" "0" ...
## $ X2day : chr "0" "0" "0" "0" ...
## $ X3day : chr "0" "0" "0" "0" ...
## $ X4day : chr "0" "0" "0" "0" ...
## $ X5day : chr "0" "0" "0" "0" ...
## $ X6day : chr "0" "0" "0" "0" ...
## $ X7day : chr "0" "0" "0" "0" ...
## $ X8day : chr "0" "0" "0" "0" ...
## $ X9day : chr "0" "0" "0" "0" ...
## $ X10day : chr "0" "1" "0" "0" ...
## $ X11day : chr "0" "0" "1" "0" ...
## $ X12day : chr "0" "0" "0" "0" ...
## $ X13day : chr "0" "0" "0" "1" ...
## $ X14day : chr "0" "0" "1" "0" ...
## $ X15day : chr "0" "0" "0" "0" ...
## $ X16day : chr "0" "0" "0" "0" ...
## $ X17day : chr "0" "0" "0" "0" ...
## $ X18day : chr "0" "0" "0" "0" ...
## $ X19day : chr "0" "0" "0" "0" ...
## $ X20day : chr "0" "0" "0" "0" ...
## $ X21day : chr "0" "0" "0" "0" ...
## $ X22day : chr "0" "0" "0" "0" ...
## $ X23day : chr "0" "1" "0" "0" ...
## $ X24day : chr "0" "1" "0" "0" ...
## $ X25day : chr "0" "1" "0" "0" ...
## $ X26day : chr "0" "0" "0" "0" ...
## $ X27day : chr "0" "0" "0" "0" ...
## $ X28day : chr "0" "0" "0" "0" ...
## $ X29day : chr "0" "0" "0" "0" ...
## $ X30day : chr "0" "0" "0" "0" ...
## $ X31day : chr "0" "0" "0" "0" ...
## $ is_sp : num 0 0 0 0 0 0 0 0 0 0 ...
## $ prob : num 0.11 0 0.03 0.1 0.13 0.03 0.03 0 0.03 0.43 ...
## $ pred : num 0 0 0 0 0 0 0 0 0 0 ...
Add a new chunk by clicking the Insert Chunk button on the toolbar or by pressing Ctrl+Alt+I.
When you save the notebook, an HTML file containing the code and output will be saved alongside it (click the Preview button or press Ctrl+Shift+K to preview the HTML file).