Outcome function

outcome_fun <- function(
  p_logon = NULL, 
  n_logon= NULL, 
  t_browse= NULL, 
  p_feed_type= NULL, 
  n_browse= NULL, 
  n_engage= NULL, 
  p_engage_type= NULL, 
  n_share= NULL, 
  p_postshare= NULL, 
  p_postshare_type= NULL, 
  t_fb= NULL){

  outcome <-  0.8 * n_logon * p_logon   
  

  return(outcome)
  
}

Example Output

outcome_fun(n_logon = 5000, p_logon = 0.4)
## [1] 1600

Group means and sds. In CSV?

outcomes_summary <- function(df){
  set.seed(1)
  list <- list()
   df_out <- list()
  for(i in unique(df$group)){
  
  
    
    df_group <- df[df$group == i, ]
    
    group_outcome <- outcome_fun(n_logon = df_group$mean[df_group$parameter == "n_logon"], 
                p_logon = 0.4
    )
    
    
    
    n_individuals <- 10000
    individual_sim <- data.frame(n_logon = numeric(n_individuals))
    
    
    # individual_sim$p_logon <- rnorm(n = n_individuals, 
    #                             mean = df_group$mean[df_group$parameter == "p_logon"], 
    #                             sd = df_group$sd[df_group$parameter == "p_logon"])
    
    
    # number of FB logins in a time range
    
    individual_sim$n_logon <- as.integer(rnorm(n = n_individuals, 
                                    mean = df_group$mean[df_group$parameter == "n_logon"], 
                                    sd = df_group$sd[df_group$parameter == "n_logon"]))
    
    # Time spent on browsing posts per login (T_browse)
    # conditional on n_logon > 0
    
    individual_sim$t_browse <- ifelse(individual_sim$n_logon == 0, 0, as.integer(rnorm(n = sum(individual_sim$n_logon != 0), 
                                mean = df_group$mean[df_group$parameter == "t_browse"], 
                                sd = df_group$sd[df_group$parameter == "t_browse"])))
      
   
    
    # Proportion of posts of the type among all the posts on the user's Feed
    # conditional on t_browse > 0
  
    # individual_sim$p_feed_type <- ifelse(individual_sim$t_browse == 0, 0, rnorm(n = sum(individual_sim$t_browse != 0) , 
    #                             mean = df_group$mean[df_group$parameter == "p_feed_type"], 
    
    
    individual_sim$feed_type <-  ifelse(individual_sim$t_browse == 0, 0, rbinom(n = sum(individual_sim$t_browse != 0) , size = 1, prob = df_group$mean[df_group$parameter == "p_feed_type"]))
    
    #Browsing speed: Number of posts browsed per time unit 
    # conditional on p_feed_type > 0
    
    individual_sim$n_browse <- ifelse(individual_sim$feed_type == 0, 0, as.integer(rnorm(n = sum(individual_sim$feed_type != 0) , 
                              mean = df_group$mean[df_group$parameter == "n_browse"], 
                              sd = df_group$sd[df_group$parameter == "n_browse"])))
     
     #Number of posts the user engages with in a time unit
    # conditional on n_browse > 0
    
       individual_sim$n_engage <- ifelse(individual_sim$n_browse == 0, 0, as.integer(rnorm(n = sum(individual_sim$n_browse != 0) , 
                              mean = df_group$mean[df_group$parameter == "n_engage"], 
                              sd = df_group$sd[df_group$parameter == "n_engage"])))
      
    #Proportion of posts of the type vs. posts of other types the user engages with
    # conditional on n_engage > 0
    
    individual_sim$p_engage_type <- ifelse(individual_sim$n_engage == 0, 0, rnorm(n = sum(individual_sim$n_engage != 0) , 
                              mean = df_group$mean[df_group$parameter == "p_engage_type"], 
                              sd = df_group$sd[df_group$parameter == "p_engage_type"]))
    
    # Number of FB sharings in a time unit 
  # conditional on p_engage_type > 0
        individual_sim$n_share <- ifelse(individual_sim$p_engage_type == 0, 0, as.integer(rnorm(n = sum(individual_sim$p_engage_type != 0) , 
                              mean = df_group$mean[df_group$parameter == "n_share"], 
                              sd = df_group$sd[df_group$parameter == "n_share"])))
        
    # Proportion of post sharings vs. other sharings
  # conditional on n_share > 0
        
    individual_sim$p_postshare <- ifelse(individual_sim$n_share == 0, 0, rnorm(n = sum(individual_sim$n_share != 0) , 
                              mean = df_group$mean[df_group$parameter == "p_postshare"], 
                              sd = df_group$sd[df_group$parameter == "p_postshare"]))
    
    #Proportion of sharing posts of the type vs. posts of other types 
    #conditional on p_postshare > 0
    
     individual_sim$p_postshare_type <- ifelse(individual_sim$p_postshare == 0, 0, rnorm(n = sum(individual_sim$p_postshare != 0) , 
                              mean = df_group$mean[df_group$parameter == "p_postshare_type"], 
                              sd = df_group$sd[df_group$parameter == "p_postshare_type"]))
     
     #Time spent on FB per login
     #conditional on  p_postshare_type > 0
     individual_sim$t_fb <- ifelse(individual_sim$p_postshare_type == 0, 0, as.integer(rnorm(n = sum(individual_sim$p_postshare_type != 0) , 
                              mean = df_group$mean[df_group$parameter == "t_fb"], 
                              sd = df_group$sd[df_group$parameter == "t_fb"])))
     
     
        
        
    
    individual_sim$outcome <- outcome_fun(n_logon = individual_sim$n_logon , p_logon = 0.4)
    
    
    

    
    list[[i]] <- list(group = i, 
                      group_outcome = group_outcome, 
                      mean_individual = mean(individual_sim$outcome),  
                      sd_individual = sd(individual_sim$outcome),  
                      min_individual = min(individual_sim$outcome),
                      q25_individual = unname(quantile(individual_sim$outcome, probs = c(.25))),
                      median_individual = median(individual_sim$outcome),
                      q75_individual = unname(quantile(individual_sim$outcome, probs = c(.75))),
                      max_individual = max(individual_sim$outcome))
    
    df_out[[i]] <- individual_sim
  }
   
   all_data <- do.call("rbind", df_out)
  list[[i + 1]] <- list(group = "all",
                      group_outcome = group_outcome, 
                      mean_individual = mean(all_data$outcome),  
                      sd_individual = sd(all_data$outcome),  
                      min_individual = min(all_data$outcome),
                      q25_individual = unname(quantile(all_data$outcome, probs = c(.25))),
                      median_individual = median(all_data$outcome),
                      q75_individual = unname(quantile(all_data$outcome, probs = c(.75))),
                      max_individual = max(all_data$outcome))
   
   
  summary_out <- do.call("rbind", list)
  
  return(summary_out)
}
outcomes_summary(df) %>%
  kable()
group group_outcome mean_individual sd_individual min_individual q25_individual median_individual q75_individual max_individual
1 32 31.81712 3.24111422010067 20.16 29.76 31.68 33.92 44.16
2 32 31.806848 3.21367243413939 18.24 29.76 31.68 33.92 43.2
3 32 31.833344 3.24327600761794 17.28 29.44 32 33.92 42.88
all 32 31.819104 3.23262627692596 17.28 29.76 31.68 33.92 44.16