Procedures Analysis

Brian Abelson | News Analytics | 6/12/2013

SETUP

Init packages, read in data

Cleaning

Exploratory Analysis

Chapter Depth

barplot(table(e$max_chapter), col = "darkblue", border = "white", main = "Furthest chapter reached per visit", 
    xlab = "Chapter (0 = no chapter event fired)", ylab = "Number of visits")

plot of chunk unnamed-chunk-3

Another View…

t <- table(e$max_chapter)
for (i in length(t):1) {
    if (i == length(t)) {
        t[i] <- t[i]
    } else {
        t[i] <- t[i] + t[i + 1]
    }
}
t <- (t/t[1]) * 100
plot(t, type = "b", pch = 20, col = "darkblue", bty = "n", xlab = "Chapter (0 = no chapter event fired)", 
    ylab = "Percentage of visitors", main = "Percentage of visitors that reached each chapter")

plot of chunk unnamed-chunk-4

Computer vs. Mobile vs. Tablet

t <- table(e$ismobile, e$max_chapter)
tt <- table(e$istablet, e$max_chapter)
t[1, ] <- t[1, ]/sum(t[1, ])
t[2, ] <- t[2, ]/sum(t[2, ])
t <- rbind(tt[2, ]/sum(tt[2, ]), t)
barplot(t, beside = TRUE, legend.text = c("Tablet", "Computer", "Mobile"), col = brewer.pal(3, 
    "Blues"), border = "grey50", xlab = "Chapter (0 = no chapter event fired)", 
    ylab = "Percentage of visitors", main = "Percentage of visitors that reached each chapter by device")

plot of chunk unnamed-chunk-5

Depth over time

tt <- table(e$max_chapter, e$bucket_hour)
for (i in 1:ncol(tt)) {
    tt[, i] <- as.numeric(tt[, i])/sum(tt[, i])
}
barplot(tt, beside = FALSE, col = brewer.pal(6, "Blues"), main = "Percentage of visitors by max chapter reached by time", 
    xlab = "Hour", ylab = "Percentage of visitors", border = "grey50", legend.text = c(0:5))

plot of chunk unnamed-chunk-6

Max Chapter by Hour

tt <- as.matrix(table(e$max_chapter, e$hour))
barplot(tt, beside = FALSE, col = brewer.pal(6, "Blues"), legend.text = c(0:5), 
    main = "Total number of visitors by max chapter reached by hour of the day", 
    xlab = "Hour", ylab = "Visitors", border = "grey50")

plot of chunk unnamed-chunk-7

plot of chunk unnamed-chunk-8

Top Referrers

plot of chunk unnamed-chunk-9

Action Analysis

Logisitc model of commenting

m <- glm(save ~ n_events + max_chapter + total_time + reached_comments + nav + 
    pager + play + +android + firefox + safari + ismobile + android + ie + n_segment, 
    data = e[sample(1:nrow(e), 500000), ], family = "binomial")
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
summary(m)
## 
## Call:
## glm(formula = save ~ n_events + max_chapter + total_time + reached_comments + 
##     nav + pager + play + +android + firefox + safari + ismobile + 
##     android + ie + n_segment, family = "binomial", data = e[sample(1:nrow(e), 
##     500000), ])
## 
## Deviance Residuals: 
##    Min      1Q  Median      3Q     Max  
## -8.490  -0.122  -0.095  -0.079   3.787  
## 
## Coefficients:
##                     Estimate  Std. Error z value             Pr(>|z|)    
## (Intercept)      -5.74389462  0.04287907 -133.96 < 0.0000000000000002 ***
## n_events          0.27586194  0.00280025   98.51 < 0.0000000000000002 ***
## max_chapter       0.05890574  0.01693603    3.48               0.0005 ***
## total_time       -0.00000484  0.00000111   -4.37   0.0000123374403573 ***
## reached_comments -0.39912331  0.05574710   -7.16   0.0000000000008095 ***
## nav               0.41550338  0.07050140    5.89   0.0000000037799093 ***
## pager            -2.05715326  0.08105709  -25.38 < 0.0000000000000002 ***
## play              0.11306747  0.03450038    3.28               0.0010 ** 
## android           0.24267475  0.18454606    1.31               0.1885    
## firefox           0.12748534  0.03902157    3.27               0.0011 ** 
## safari            0.18386758  0.03728574    4.93   0.0000008167965239 ***
## ismobile         -1.94445555  0.11504072  -16.90 < 0.0000000000000002 ***
## ie               -0.35056949  0.04520711   -7.75   0.0000000000000089 ***
## n_segment        -0.26957349  0.03283085   -8.21 < 0.0000000000000002 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 73352  on 499831  degrees of freedom
## Residual deviance: 47212  on 499818  degrees of freedom
##   (168 observations deleted due to missingness)
## AIC: 47240
## 
## Number of Fisher Scoring iterations: 9
pretty_coefs(m, title = "What predicts commenting?", null = 0)

plot of chunk unnamed-chunk-11

Generate Event Graphs

Functions

code removed

Graph Analysis

Create and filter graphs

ep_save <- e$event_path[e$save > 0]
g_save <- gen_graph(ep_save[!is.na(ep_save)])

ep_non <- e$event_path[e$save == 0]
ep_non <- ep_non[sample(1:length(ep_non), length(ep_save))]
ep_non <- ep_non[!is.na(ep_non) & ep_non != "exit"]
g_non <- gen_graph(ep_non)

ep_all <- e$event_path[!is.na(e$event_path)]
ep_all <- ep_all[sample(1:length(ep_all), length(ep_save))]
ep_all <- ep_all[!is.na(ep_all)]
ep_all <- ep_all[ep_all != "exit"]
g_all <- gen_graph(ep_all)

Graph of all visitors

g_all <- delete.edges(g_all, which(E(g_all)$Weight < 50))
plot_graph(g_all)

plot of chunk unnamed-chunk-14

Graph of commenters


g_save <- delete.edges(g_save, which(E(g_save)$Weight < 100))
plot_graph(g_save)

plot of chunk unnamed-chunk-15

Graph of non-commenters


g_non <- delete.edges(g_non, which(E(g_non)$Weight < 50))
plot_graph(g_non)

plot of chunk unnamed-chunk-16