Brian Abelson | News Analytics | 6/12/2013
barplot(table(e$max_chapter), col = "darkblue", border = "white", main = "Furthest chapter reached per visit",
xlab = "Chapter (0 = no chapter event fired)", ylab = "Number of visits")
t <- table(e$max_chapter)
for (i in length(t):1) {
if (i == length(t)) {
t[i] <- t[i]
} else {
t[i] <- t[i] + t[i + 1]
}
}
t <- (t/t[1]) * 100
plot(t, type = "b", pch = 20, col = "darkblue", bty = "n", xlab = "Chapter (0 = no chapter event fired)",
ylab = "Percentage of visitors", main = "Percentage of visitors that reached each chapter")
t <- table(e$ismobile, e$max_chapter)
tt <- table(e$istablet, e$max_chapter)
t[1, ] <- t[1, ]/sum(t[1, ])
t[2, ] <- t[2, ]/sum(t[2, ])
t <- rbind(tt[2, ]/sum(tt[2, ]), t)
barplot(t, beside = TRUE, legend.text = c("Tablet", "Computer", "Mobile"), col = brewer.pal(3,
"Blues"), border = "grey50", xlab = "Chapter (0 = no chapter event fired)",
ylab = "Percentage of visitors", main = "Percentage of visitors that reached each chapter by device")
tt <- table(e$max_chapter, e$bucket_hour)
for (i in 1:ncol(tt)) {
tt[, i] <- as.numeric(tt[, i])/sum(tt[, i])
}
barplot(tt, beside = FALSE, col = brewer.pal(6, "Blues"), main = "Percentage of visitors by max chapter reached by time",
xlab = "Hour", ylab = "Percentage of visitors", border = "grey50", legend.text = c(0:5))
tt <- as.matrix(table(e$max_chapter, e$hour))
barplot(tt, beside = FALSE, col = brewer.pal(6, "Blues"), legend.text = c(0:5),
main = "Total number of visitors by max chapter reached by hour of the day",
xlab = "Hour", ylab = "Visitors", border = "grey50")
m <- glm(save ~ n_events + max_chapter + total_time + reached_comments + nav +
pager + play + +android + firefox + safari + ismobile + android + ie + n_segment,
data = e[sample(1:nrow(e), 500000), ], family = "binomial")
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
summary(m)
##
## Call:
## glm(formula = save ~ n_events + max_chapter + total_time + reached_comments +
## nav + pager + play + +android + firefox + safari + ismobile +
## android + ie + n_segment, family = "binomial", data = e[sample(1:nrow(e),
## 500000), ])
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -8.490 -0.122 -0.095 -0.079 3.787
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -5.74389462 0.04287907 -133.96 < 0.0000000000000002 ***
## n_events 0.27586194 0.00280025 98.51 < 0.0000000000000002 ***
## max_chapter 0.05890574 0.01693603 3.48 0.0005 ***
## total_time -0.00000484 0.00000111 -4.37 0.0000123374403573 ***
## reached_comments -0.39912331 0.05574710 -7.16 0.0000000000008095 ***
## nav 0.41550338 0.07050140 5.89 0.0000000037799093 ***
## pager -2.05715326 0.08105709 -25.38 < 0.0000000000000002 ***
## play 0.11306747 0.03450038 3.28 0.0010 **
## android 0.24267475 0.18454606 1.31 0.1885
## firefox 0.12748534 0.03902157 3.27 0.0011 **
## safari 0.18386758 0.03728574 4.93 0.0000008167965239 ***
## ismobile -1.94445555 0.11504072 -16.90 < 0.0000000000000002 ***
## ie -0.35056949 0.04520711 -7.75 0.0000000000000089 ***
## n_segment -0.26957349 0.03283085 -8.21 < 0.0000000000000002 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 73352 on 499831 degrees of freedom
## Residual deviance: 47212 on 499818 degrees of freedom
## (168 observations deleted due to missingness)
## AIC: 47240
##
## Number of Fisher Scoring iterations: 9
pretty_coefs(m, title = "What predicts commenting?", null = 0)
code removed
ep_save <- e$event_path[e$save > 0]
g_save <- gen_graph(ep_save[!is.na(ep_save)])
ep_non <- e$event_path[e$save == 0]
ep_non <- ep_non[sample(1:length(ep_non), length(ep_save))]
ep_non <- ep_non[!is.na(ep_non) & ep_non != "exit"]
g_non <- gen_graph(ep_non)
ep_all <- e$event_path[!is.na(e$event_path)]
ep_all <- ep_all[sample(1:length(ep_all), length(ep_save))]
ep_all <- ep_all[!is.na(ep_all)]
ep_all <- ep_all[ep_all != "exit"]
g_all <- gen_graph(ep_all)
g_all <- delete.edges(g_all, which(E(g_all)$Weight < 50))
plot_graph(g_all)
g_save <- delete.edges(g_save, which(E(g_save)$Weight < 100))
plot_graph(g_save)
g_non <- delete.edges(g_non, which(E(g_non)$Weight < 50))
plot_graph(g_non)