Outcome function
outcome_fun <- function(
p_logon = NULL,
n_logon= NULL,
t_browse= NULL,
p_feed_type= NULL,
n_browse= NULL,
n_engage= NULL,
p_engage_type= NULL,
n_share= NULL,
p_postshare= NULL,
p_postshare_type= NULL,
t_fb= NULL){
outcome <- 0.8 * n_logon * p_logon
return(outcome)
}
Example Output
outcome_fun(n_logon = 5000, p_logon = 0.4)
## [1] 1600
Group means and sds. In CSV?
outcomes_summary <- function(df){
set.seed(1)
list <- list()
df_out <- list()
for(i in unique(df$group)){
df_group <- df[df$group == i, ]
group_outcome <- outcome_fun(n_logon = df_group$mean[df_group$parameter == "n_logon"],
p_logon = 0.4
)
n_individuals <- 10000
individual_sim <- data.frame(n_logon = numeric(n_individuals))
# individual_sim$p_logon <- rnorm(n = n_individuals,
# mean = df_group$mean[df_group$parameter == "p_logon"],
# sd = df_group$sd[df_group$parameter == "p_logon"])
# number of FB logins in a time range
individual_sim$n_logon <- as.integer(rnorm(n = n_individuals,
mean = df_group$mean[df_group$parameter == "n_logon"],
sd = df_group$sd[df_group$parameter == "n_logon"]))
# Time spent on browsing posts per login (T_browse)
# conditional on n_logon > 0
individual_sim$t_browse <- ifelse(individual_sim$n_logon == 0, 0, as.integer(rnorm(n = sum(individual_sim$n_logon != 0),
mean = df_group$mean[df_group$parameter == "t_browse"],
sd = df_group$sd[df_group$parameter == "t_browse"])))
# Proportion of posts of the type among all the posts on the user's Feed
# conditional on t_browse > 0
# individual_sim$p_feed_type <- ifelse(individual_sim$t_browse == 0, 0, rnorm(n = sum(individual_sim$t_browse != 0) ,
# mean = df_group$mean[df_group$parameter == "p_feed_type"],
individual_sim$feed_type <- ifelse(individual_sim$t_browse == 0, 0, rbinom(n = sum(individual_sim$t_browse != 0) , size = 1, prob = df_group$mean[df_group$parameter == "p_feed_type"]))
#Browsing speed: Number of posts browsed per time unit
# conditional on p_feed_type > 0
individual_sim$n_browse <- ifelse(individual_sim$feed_type == 0, 0, as.integer(rnorm(n = sum(individual_sim$feed_type != 0) ,
mean = df_group$mean[df_group$parameter == "n_browse"],
sd = df_group$sd[df_group$parameter == "n_browse"])))
#Number of posts the user engages with in a time unit
# conditional on n_browse > 0
individual_sim$n_engage <- ifelse(individual_sim$n_browse == 0, 0, as.integer(rnorm(n = sum(individual_sim$n_browse != 0) ,
mean = df_group$mean[df_group$parameter == "n_engage"],
sd = df_group$sd[df_group$parameter == "n_engage"])))
#Proportion of posts of the type vs. posts of other types the user engages with
# conditional on n_engage > 0
individual_sim$p_engage_type <- ifelse(individual_sim$n_engage == 0, 0, rnorm(n = sum(individual_sim$n_engage != 0) ,
mean = df_group$mean[df_group$parameter == "p_engage_type"],
sd = df_group$sd[df_group$parameter == "p_engage_type"]))
# Number of FB sharings in a time unit
# conditional on p_engage_type > 0
individual_sim$n_share <- ifelse(individual_sim$p_engage_type == 0, 0, as.integer(rnorm(n = sum(individual_sim$p_engage_type != 0) ,
mean = df_group$mean[df_group$parameter == "n_share"],
sd = df_group$sd[df_group$parameter == "n_share"])))
# Proportion of post sharings vs. other sharings
# conditional on n_share > 0
individual_sim$p_postshare <- ifelse(individual_sim$n_share == 0, 0, rnorm(n = sum(individual_sim$n_share != 0) ,
mean = df_group$mean[df_group$parameter == "p_postshare"],
sd = df_group$sd[df_group$parameter == "p_postshare"]))
#Proportion of sharing posts of the type vs. posts of other types
#conditional on p_postshare > 0
individual_sim$p_postshare_type <- ifelse(individual_sim$p_postshare == 0, 0, rnorm(n = sum(individual_sim$p_postshare != 0) ,
mean = df_group$mean[df_group$parameter == "p_postshare_type"],
sd = df_group$sd[df_group$parameter == "p_postshare_type"]))
#Time spent on FB per login
#conditional on p_postshare_type > 0
individual_sim$t_fb <- ifelse(individual_sim$p_postshare_type == 0, 0, as.integer(rnorm(n = sum(individual_sim$p_postshare_type != 0) ,
mean = df_group$mean[df_group$parameter == "t_fb"],
sd = df_group$sd[df_group$parameter == "t_fb"])))
individual_sim$outcome <- outcome_fun(n_logon = individual_sim$n_logon , p_logon = 0.4)
list[[i]] <- list(group = i,
group_outcome = group_outcome,
mean_individual = mean(individual_sim$outcome),
sd_individual = sd(individual_sim$outcome),
min_individual = min(individual_sim$outcome),
q25_individual = unname(quantile(individual_sim$outcome, probs = c(.25))),
median_individual = median(individual_sim$outcome),
q75_individual = unname(quantile(individual_sim$outcome, probs = c(.75))),
max_individual = max(individual_sim$outcome))
df_out[[i]] <- individual_sim
}
all_data <- do.call("rbind", df_out)
list[[i + 1]] <- list(group = "all",
group_outcome = group_outcome,
mean_individual = mean(all_data$outcome),
sd_individual = sd(all_data$outcome),
min_individual = min(all_data$outcome),
q25_individual = unname(quantile(all_data$outcome, probs = c(.25))),
median_individual = median(all_data$outcome),
q75_individual = unname(quantile(all_data$outcome, probs = c(.75))),
max_individual = max(all_data$outcome))
summary_out <- do.call("rbind", list)
return(summary_out)
}
outcomes_summary(df) %>%
kable()
| group | group_outcome | mean_individual | sd_individual | min_individual | q25_individual | median_individual | q75_individual | max_individual |
|---|---|---|---|---|---|---|---|---|
| 1 | 32 | 31.81712 | 3.24111422010067 | 20.16 | 29.76 | 31.68 | 33.92 | 44.16 |
| 2 | 32 | 31.806848 | 3.21367243413939 | 18.24 | 29.76 | 31.68 | 33.92 | 43.2 |
| 3 | 32 | 31.833344 | 3.24327600761794 | 17.28 | 29.44 | 32 | 33.92 | 42.88 |
| all | 32 | 31.819104 | 3.23262627692596 | 17.28 | 29.76 | 31.68 | 33.92 | 44.16 |