Patient Trials

Author

João C. L. Camargos And Nick Antolick

1 Replication Task

In this exercise, we will replicate the study by Knox et al (2019). The article introduce the Patient Preference Trials design, a way to contour the estimation of heterogeneous treatment effects with self-selection context, in political science. This exercise is based on the code provided on the suplememtary materials.

The variables on the data set will be defined as:

S: stated preference (factor / integer in \(\{0,…,J-1\}\))
D: design arm (0 = free-choice, 1 = forced-exposure)
C: revealed preference / actual choice (only observed when \(D==0\))
A: actual treatment received (randomized when \(D==1\), else \(A=C\))
Y: outcome (continuous in \([0,1]\) or binary). In the example a sentiment index towards media (continuous) and if the respondent would recommend the media to a friend (binary)

1.1 Loading dependencies

Show Answer

## note: S_i, C_i, A_i are zero-indexed in paper but one-indexed in code

'%.%' <- function(x,y) paste(x,y,sep='')

# Loading packages
library(reshape2)
library(plyr)
library(tidyverse)
library(janitor)
library(xtable)
library(MASS)    # for mvrnorm()
library(gtools)  # for rdirichlet()
library(lpSolve)
library(sjPlot)

# Loading functions

devtools::source_url("https://raw.githubusercontent.com/dcknox/ppt/master/functions/bounds_frechet_function.R")

devtools::source_url("https://raw.githubusercontent.com/dcknox/ppt/master/functions/bounds_binary_function.R")

options(download.file.method = "libcurl", timeout = 600)

load_rds_remote <- function(url) {
  tf <- tempfile(fileext = ".rds")
  tryCatch({
    utils::download.file(url, tf, mode = "wb", quiet = TRUE)
    readRDS(tf)
  }, error = function(e) {
    # Fallbacks if download.file fails in your knit environment
    if (!requireNamespace("curl", quietly = TRUE)) install.packages("curl")
    con <- curl::curl(url)
    on.exit(try(close(con), silent = TRUE), add = TRUE)
    raw <- readBin(con, what = "raw", n = 1e8)
    writeBin(raw, tf)
    readRDS(tf)
  })
}

# Creating directory to save the results
if (!dir.exists('results')){
  dir.create('results')
}

###########
## style ##
###########

red_dark <- '#A53F4F'
red_medium <- '#A31F34'
red_light <- '#A9606C'

blue_dark <- '#0B356F'
blue_medium <- '#315485'
blue_lightest <- '#8F9AAA'

grey_light<- '#C2C0BF'
grey_dark <- '#8A8B8C'

1.2 Data Cleaning

Let’s start downloading the data set and pre processing it for the analysis.

Show Answer

# Reading data
url <- "https://raw.githubusercontent.com/dcknox/ppt/master/data/Spring2014omnibus_raw.csv"
d.raw <- read.csv(url, stringsAsFactors = FALSE, check.names = FALSE)

## media assignment variables: convert NA/1 to 0/1
d.raw$fox[is.na(d.raw$fox)] <- 0
d.raw$msnbc[is.na(d.raw$msnbc)] <- 0
d.raw$entertainment[is.na(d.raw$entertainment)] <- 0
d.raw$med_choice[is.na(d.raw$med_choice)] <- 0  # 0 if no choice given, else 1-4

## construct 'stated preference' variable
d.raw$med_pref[which(d.raw$med_pref == 4)] <- 3
d.raw$med_pref[which(d.raw$med_pref == 0)] <- NA

## construct 'actual choice' variable
##   (pool both entertainment options in free-choice arm,
##    already pooled in forced choice arm)
d.raw$med_choice[which(d.raw$med_choice == 4)] <- 3
d.raw$med_choice[which(d.raw$med_choice == 0)] <- NA

## construct 'received treatment' assignment variable
##   (A=c if D=0, A~cat(1/3,1/3,1/3) if D=1)
##   (fox=1, msnbc=2, entertainment=3)
d.raw$med_assign <-
  ifelse(d.raw$forcedchoice == 1,
         d.raw$fox + 2*d.raw$msnbc + 3*d.raw$entertainment,
         d.raw$med_choice
         )

## construct party id variable
d.raw$pid <- NA
d.raw$pid[d.raw$party1 == 1] <- -1  # considers self a democrat
d.raw$pid[d.raw$party1 == 2] <- 1   # considers self a republican
d.raw$pid[d.raw$party4 == 1] <- 1   # neither but closer to rep
d.raw$pid[d.raw$party4 == 2] <- -1  # neither but closer to rep
d.raw$pid[d.raw$party4 == 3] <- 0   # closer to neither rep or dem

## construct pro/counter-attitudinal media variable
##   pro-attitudinal = 1      (fox for reps, or msnbc for dems)
##   counter-attitudinal = 2  (msnbc for reps, or fox for dems)
##   entertainment = 3        (for both parties)
med_labels <- c('pro-attitudinal','counter-attitudinal','entertainment')
d.raw <- d.raw[-which(d.raw$pid == 0 | is.na(d.raw$pid)),] # drop independents
d.raw$med_pref.att <-
  ifelse((d.raw$med_pref == 1 & d.raw$pid == 1) | (d.raw$med_pref == 2 & d.raw$pid == -1), 1,
         ifelse((d.raw$med_pref == 2 & d.raw$pid == 1) | (d.raw$med_pref == 1 & d.raw$pid == -1), 2, 3)
         )
d.raw$med_choice.att <-
  ifelse((d.raw$med_choice == 1 & d.raw$pid == 1) | (d.raw$med_choice == 2 & d.raw$pid == -1), 1,
         ifelse((d.raw$med_choice == 2 & d.raw$pid == 1) | (d.raw$med_choice == 1 & d.raw$pid == -1), 2, 3)
         )
d.raw$med_assign.att <-
  ifelse((d.raw$med_assign == 1 & d.raw$pid == 1) | (d.raw$med_assign == 2 & d.raw$pid == -1), 1,
         ifelse((d.raw$med_assign == 2 & d.raw$pid == 1) | (d.raw$med_assign == 1 & d.raw$pid == -1), 2, 3)
         )

## construct media sentiment index and rescale to [0,1]
##   these are eight questions on perception of media  (7-point scales)
##   flip all questions so larger numbers mean 'media is good'
d.raw$perceive.index <- rowSums(cbind(d.raw$fair_4,
                                      d.raw$friendly_4,
                                      d.raw$good_4,
                                      d.raw$quarrel_4 * -1 + 8,
                                      d.raw$balanced_4,
                                      d.raw$oneside_4* -1 + 8,
                                      d.raw$american_4,
                                      d.raw$accurate_4
                                      ))
d.raw$perceive.index <- (d.raw$perceive.index - 8) / -48 + 1


tab_df(d.raw |> clean_names()|> dplyr::slice_head(n= 10))

x	forcedchoice	fox	msnbc	entertainment	med_pref	med_choice	actions_discuss	fair_4	friendly_4	good_4	quarrel_4	balanced_4	oneside_4	american_4	accurate_4	party1	party4	med_assign	pid	med_pref_att	med_choice_att	med_assign_att	perceive_index
1	1	0	0	1	2	NA	7	4	4	4	4	4	4	4	4	1	NA	3	-1	1	NA	3	0.50
2	0	0	0	0	3	3	3	4	2	3	6	3	4	3	3	1	NA	3	-1	3	3	3	0.67
3	1	0	1	0	3	NA	2	4	3	3	4	3	4	3	4	1	NA	2	-1	3	NA	1	0.58
5	0	0	0	0	3	3	4	1	1	1	7	1	6	1	1	1	NA	3	-1	3	3	3	0.98
7	0	0	0	0	3	3	4	2	3	2	6	2	6	1	1	3	2	3	-1	3	3	3	0.85
8	1	0	1	0	3	NA	7	4	7	4	1	4	4	7	4	2	NA	2	1	3	NA	2	0.31
11	0	0	0	0	3	1	2	1	4	1	5	1	5	1	1	2	NA	1	1	3	1	1	0.85
12	1	0	0	1	2	NA	3	2	3	3	5	3	3	3	3	1	NA	3	-1	1	NA	3	0.65
13	1	1	0	0	1	NA	1	5	3	5	5	5	2	2	3	1	NA	1	-1	2	NA	2	0.50
15	0	0	0	0	1	1	2	1	5	1	3	3	3	1	1	2	NA	1	1	1	1	1	0.71

Now let’s split the data set for specific analysis on each outcome variable. We will start by making the data set for continuous variables \([0,1]\). Those are going to be stored in the d.frechet object.

Show Answer

##   media sentiment index
d.frechet <- data.frame(S = d.raw$med_pref.att,
                        C = d.raw$med_choice.att,
                        A = d.raw$med_assign.att,
                        D = d.raw$forcedchoice
                        )
d.frechet$Y <- d.raw$perceive.index
drop <- which(is.na(d.frechet$S) |
                is.na(d.frechet$D) |
                is.na(d.frechet$A) |
                is.na(d.frechet$Y) |
                (is.na(d.frechet$C) & d.frechet$D == 0)
              )
if (length(drop) > 0){
  d.frechet <- d.frechet[-drop,]
}
rm(drop)

tab_df(d.frechet |> slice_head(n= 10))

S	C	A	D	Y
1	NA	3	1	0.50
3	3	3	0	0.67
3	NA	1	1	0.58
3	3	3	0	0.98
3	3	3	0	0.85
3	NA	2	1	0.31
3	1	1	0	0.85
1	NA	3	1	0.65
2	NA	2	1	0.50
1	1	1	0	0.71

Now let’s make the data set for binary variables. This will be stored on the d.binary object.

Show Answer

##   at least somewhat likely to discuss clip with friend
d.binary <- data.frame(S = d.raw$med_pref.att,
                       C = d.raw$med_choice.att,
                       A = d.raw$med_assign.att,
                       D = d.raw$forcedchoice
                       )
d.binary$Y <- d.raw$actions_discuss <= 3  # not likely/not sure = 0, else = 1
drop <- which(is.na(d.binary$S) |
                is.na(d.binary$D) |
                is.na(d.binary$A) |
                is.na(d.binary$Y) |
                (is.na(d.binary$C) & d.binary$D == 0)
              )
if (length(drop) > 0){
  d.binary <- d.binary[-drop,]
}
rm(drop)


tab_df(d.binary |> slice_head(n= 10))

S	C	A	D	Y
1	NA	3	1	FALSE
3	3	3	0	TRUE
3	NA	1	1	TRUE
3	3	3	0	FALSE
3	3	3	0	FALSE
3	NA	2	1	FALSE
3	1	1	0	TRUE
1	NA	3	1	TRUE
2	NA	2	1	TRUE
1	1	1	0	TRUE

To last, we will prepare an effect matrix (effects.mat) that will help to estimate effect of changing between the treatment values. This will be later imputed as an argument in the function to estimate the treatment effects.

Show Answer

J <- 3
effects.mat <- matrix(FALSE, J, J)
dimnames(effects.mat) <- list(ctrl = NULL,
                              treat = NULL)
effects.mat[2, 1] <- TRUE
effects.mat[3, 1] <- TRUE
effects.mat[3, 2] <- TRUE

1.3 Data description

Now let’s do some data description. First, we will start by geting the mean variables of each outcome.

Show Answer

d.summary <- data.frame(S = d.raw$med_pref.att,
                        C = d.raw$med_choice.att,
                        A = d.raw$med_assign.att,
                        D = d.raw$forcedchoice,
                        Y.sentiment = d.raw$perceive.index,
                        Y.discuss = d.raw$actions_discuss <= 3
                        )


data.frame(
  "Mean Sentiment Index" = round(mean(d.summary$Y.sentiment, na.rm = TRUE), 2),
  "SD Sentiment Index" = round(sd(d.summary$Y.sentiment, na.rm = TRUE), 2),
  "Proportion of recommenders" = round(mean(d.summary$Y.discuss, na.rm = TRUE), 3)
) |> tab_df()

Mean.Sentiment.Index	SD.Sentiment.Index	Proportion.of.recommenders
0.61	0.17	0.62

Now we will get the effects by treatment arm and pro and counter-attitudinal choice. First we will see those who had a free-choice.

Show Answer

print("Free-Choice Summary")

[1] "Free-Choice Summary"

Show Answer

round(t(
  ddply(d.summary[d.summary$D == 0,], c('S', 'A'), function(d){
    data.frame(
      strata.prop = round(nrow(d) / sum(d.summary$D == 0),2),
      sentiment = round(mean(d$Y.sentiment, na.rm = TRUE),2),
      discuss = round(mean(d$Y.discuss, na.rm = TRUE),2)
    )
  })
), 2)

            [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9]
S           1.00 1.00 1.00 2.00 2.00 2.00 3.00 3.00 3.00
A           1.00 2.00 3.00 1.00 2.00 3.00 1.00 2.00 3.00
strata.prop 0.25 0.02 0.03 0.01 0.09 0.02 0.03 0.02 0.53
sentiment   0.67 0.51 0.66 0.52 0.56 0.60 0.60 0.54 0.68
discuss     0.77 0.76 0.62 0.62 0.75 0.68 0.82 0.77 0.57

Now let’s see the results for the forced-choice group.

Show Answer

print("Forced-Choice Summary")

[1] "Forced-Choice Summary"

Show Answer

round(t(
  ddply(d.summary[d.summary$D == 1,], c('S', 'A'), function(d){
    data.frame(
      strata.prop = nrow(d) / sum(d.summary$D == 0),
      sentiment = mean(d$Y.sentiment, na.rm = TRUE),
      discuss = mean(d$Y.discuss, na.rm = TRUE)
    )
  })
), 2)

            [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9]
S           1.00 1.00 1.00 2.00 2.00 2.00 3.00 3.00 3.00
A           1.00 2.00 3.00 1.00 2.00 3.00 1.00 2.00 3.00
strata.prop 0.10 0.11 0.11 0.04 0.05 0.05 0.20 0.18 0.16
sentiment   0.67 0.38 0.64 0.59 0.54 0.63 0.57 0.47 0.64
discuss     0.74 0.48 0.42 0.73 0.76 0.66 0.66 0.56 0.56

1.4 Analysis for the frechet method

Now we will run the analysis for the continuous variable using the function bounds.frechet, imported from Dean Knox GitHub. This is a very computationally heavy operation, which may take a few hours depending on the computer.

Show Answer

if (file.exists('./results/acte_frechet_posterior5k.rds')){
  acte.frechet <- readRDS('./results/acte_frechet_posterior5k.rds')
} else {
  set.seed(02139)
  acte.frechet <- bounds.frechet(
    data = d.frechet,
    effects.mat = effects.mat, # Effect matrix
    posterior = TRUE,
    n_dir_MC = 500,# Nu,ber o simulations
    n_norm_MC = 10, # Number of random normal draws in each simulation
    sensitivity = TRUE, # Argument to compute the sensitivity analysis when doing the simulations
    rhos = seq(0, .25, .01)
  )
  saveRDS(acte.frechet, './results/acte_frechet_posterior5k.rds')
}

In order to skip the simulation, we will also download the results from the GitHub page. Lets plot the effects of the partisan media exposure on the sentiment index towards media.

Show Answer

# Alternative to skip the simmulation
con <- "https://raw.githubusercontent.com/dcknox/ppt/master/results/acte_frechet_posterior5k.rds"
acte.frechet <- load_rds_remote(con)


### plot frechet bounds ###

plot_effects <- acte.frechet$effects

plot_effects$min.cilo <- acte.frechet$posterior$effects.ci$min_cilo
plot_effects$max.cihi <- acte.frechet$posterior$effects.ci$max_cihi

plot_effects$naive <- acte.frechet$naive$effects$naive
plot_effects$naive.cilo <- acte.frechet$posterior$naive_effects.ci$min_cilo
plot_effects$naive.cihi <- acte.frechet$posterior$naive_effects.ci$max_cihi

plot_ate <- unique(lapply(1:nrow(plot_effects),function(i){
  out <- as.numeric(plot_effects[i,c('ctrl','treat')])
  test <- t.test(d.frechet$Y[d.frechet$D==1 & d.frechet$A==out[2]],d.frechet$Y[d.frechet$D==1 & d.frechet$A==out[1]])
  out <- c(out,-diff(test$estimate),test$conf.int)
}))
plot_ate <- as.data.frame(do.call(rbind,plot_ate))
colnames(plot_ate) <- c('ctrl','treat','ate','ate.cilo','ate.cihi')
plot_ate$C <- 0

plot_effects <- rbind.fill(plot_effects,plot_ate)

plot_effects$C <- factor(plot_effects$C,levels=0:J,labels=c('pooled\n(ATE)',med_labels))
plot_effects$ctrl <- factor(plot_effects$ctrl,levels=1:J,labels= "a' = " %.% med_labels)
plot_effects$treat <- factor(plot_effects$treat,levels=1:J,labels= 'a = ' %.% med_labels)
plot_effects$treatment <- factor(plot_effects$treat %.% '\n' %.% plot_effects$ctrl)
plot_effects$treatment <- relevel(plot_effects$treatment,levels(plot_effects$treatment)[3])
plot_effects$ctrl <- plot_effects$treat <- NULL

plot_effects.melt <- melt(plot_effects,id.vars=c('C','treatment'))
plot_effects.melt$type <- ifelse(grepl('min|max',plot_effects.melt$variable),'bounds','naive')
plot_effects.melt$var <- sapply(as.character(plot_effects.melt$variable),function(x){
  switch(x,
         min='min',
         max='max',
         min.cilo='cilo',
         min.cihi=NA,
         max.cilo=NA,
         max.cihi='cihi',
         naive='point',
         naive.cilo='cilo',
         naive.cihi='cihi',
         ate='point',
         ate.cilo='cilo',
         ate.cihi='cihi'
         )
})
plot_effects.melt$variable <- NULL
plot_effects <- dcast(na.omit(plot_effects.melt), C + treatment + type ~ var)
plot_effects$type <- factor(plot_effects$type,levels=c('naive','bounds'))
plot_effects$C <-
  factor(plot_effects$C,levels=c('pooled\n(ATE)',med_labels),
         labels=c('pooled\n(ATE)','pro-\nattitudinal','counter-\nattitudinal','enter-\ntainment'),
         ordered=TRUE)


ggplot(plot_effects, aes(x = C, colour = type, linetype = type)) +
  geom_hline(yintercept = 0) +
  geom_errorbar(aes(ymin = cilo, ymax = cihi),
                width = .5,
                position = position_dodge(width = .5)) +
  geom_errorbar(aes(ymin = min, ymax = max),
                width = .25,
                size = 1.25,
                position = position_dodge(width = .5)) +
  geom_point(aes(y = point),
             position = position_dodge(width = .5),
             size = 1.5) +
  geom_errorbar(aes(ymin = cilo, ymax = cihi),
                data = plot_effects[plot_effects$C=='pooled\n(ATE)',],
                width = .5,
                size = 1,
                linetype = 'solid') +
  geom_point(aes(y = point),
             data = plot_effects[plot_effects$C=='pooled\n(ATE)',],
             position = position_dodge(width = .5),
             size = 2.5) +
  facet_grid(treatment ~ .) +
  scale_colour_manual(values = c(naive = blue_medium, bounds = red_medium),
                      guide = FALSE) +
  scale_linetype_manual(values = c(naive = 'dashed', bounds = 'solid'),
                        guide = FALSE) +
  ylab(expression('Expected change in sentiment index, ' *
                    tau *"(a, a' | c)")) +
  xlab(expression('Among subjects who would choose ' * (C[i]))) +
  scale_y_continuous(breaks = seq(-.5, .5, .25),
                     limits = c(-.425, .425)) +
  ggtitle('Sentiment toward Media\n') +
  theme(plot.title = element_text(hjust = 0.5))

1.5 Sensitivite Analysis for frechet method

Now, let’s replicate the sensitivity analysis proposed on the paper for continuous variables. As specified before, when computing the results, the function also performed the sensitivity analysis. Therefore, we just need to extract the parameters again to create the visualization.

Show Answer

### plot previously computed sensitivity results for frechet method ###

C.labels <- 'would choose\n' %.% med_labels
names(C.labels) <- c('1', '2', '3')

treatment.labels <- c('13' = "a = pro-attitudinal\na' = entertainment",
                      '23' = "a = counter-attitudinal\na' = entertainment",
                      '12' = "a = pro-attitudinal\na' = counter-attitudinal")

acte.frechet$sensitivity$effects$treatment <-
  factor(treatment.labels[acte.frechet$sensitivity$effects$treat %.%
                            acte.frechet$sensitivity$effects$ctrl],
         levels = treatment.labels)

## naive estimates (should be about the same as normal posterior)
acte.frechet$naive.ci <- as.data.frame(acte.frechet$naive$effects)
acte.frechet$naive.ci$min <-
  acte.frechet$posterior$naive_effects.ci$min_cilo
acte.frechet$naive.ci$max <-
  acte.frechet$posterior$naive_effects.ci$max_cihi
acte.frechet$naive.ci$treatment <-
  factor(treatment.labels[acte.frechet$naive.ci$treat %.% acte.frechet$naive.ci$ctrl],
         levels = treatment.labels)

## bounds on treatment effects
acte.frechet$posterior$effects.ci <-
  as.data.frame(acte.frechet$posterior$effects.quantiles[,, 'quantile_0.025'])
acte.frechet$posterior$effects.ci$max <-
  acte.frechet$posterior$effects.quantiles[, 'max', 'quantile_0.975']
acte.frechet$posterior$effects.ci$treatment <-
  factor(treatment.labels[acte.frechet$posterior$effects.ci$treat %.%
                            acte.frechet$posterior$effects.ci$ctrl],
         levels = treatment.labels)

## sensitivity analysis for treatment effects
acte.frechet$sensitivity$effects.ci <- as.data.frame(
  acte.frechet$posterior$sens_effects.quantiles[,, 'quantile_0.025']
)
acte.frechet$sensitivity$effects.ci$max <-
  acte.frechet$posterior$sens_effects.quantiles[, 'max', 'quantile_0.975']
acte.frechet$sensitivity$effects.ci$treatment <-
  factor(treatment.labels[acte.frechet$sensitivity$effects.ci$treat %.%
                            acte.frechet$sensitivity$effects.ci$ctrl],
         levels = treatment.labels)

## naive estimates assume rho == 0
##   and bounds equivalent to rho == Inf
##   (for plotting purposes, place bounds at maximum rho)
acte.frechet$naive$effects$rho <- 0
acte.frechet$naive$effects$treatment <-
  factor(treatment.labels[acte.frechet$naive$effects$treat %.%
                            acte.frechet$naive$effects$ctrl],
         levels = treatment.labels)

acte.frechet$naive.ci$rho <- 0
acte.frechet$naive.ci$treatment <-
  factor(treatment.labels[acte.frechet$naive.ci$treat %.%
                            acte.frechet$naive.ci$ctrl],
         levels = treatment.labels)

acte.frechet$effects$rho <-
  max(acte.frechet$sensitivity$effects$rho[
    is.finite(acte.frechet$sensitivity$effects$rho)
  ])
acte.frechet$effects$treatment <-
  factor(treatment.labels[acte.frechet$effects$treat %.%
                            acte.frechet$effects$ctrl],
         levels = treatment.labels)

acte.frechet$posterior$effects.ci$rho <-
  max(acte.frechet$sensitivity$effects$rho[
    is.finite(acte.frechet$sensitivity$effects$rho)
  ])
acte.frechet$posterior$effects.ci$treatment <-
  factor(treatment.labels[acte.frechet$posterior$effects.ci$treat %.%
                            acte.frechet$posterior$effects.ci$ctrl],
         levels = treatment.labels)


ggplot(acte.frechet$sensitivity$effects[
  is.finite(acte.frechet$sensitivity$effects$rho),], aes(x=rho)) +
  geom_ribbon(aes(ymin=min, ymax=max),
              data=acte.frechet$sensitivity$effects.ci[
                is.finite(acte.frechet$sensitivity$effects.ci$rho),],
              fill='#80808080') +
  geom_ribbon(aes(ymin=min, ymax=max),
              fill='#40404080') +
  geom_hline(yintercept = 0, linetype = 'dashed', colour = 'black') +
  geom_errorbar(aes(ymin = min, ymax = max, colour = 'naive'),
                data = acte.frechet$naive.ci,
                width = .02, size = .5, linetype = 'dashed') +
  geom_point(aes(y = naive, colour = 'naive'),
             data = acte.frechet$naive$effects) +
  geom_errorbar(aes(ymin = min, ymax = max, colour = 'bounds'),
                data = acte.frechet$posterior$effects.ci,
                width = .015, size = .5) +
  geom_errorbar(aes(ymin = min, ymax = max, colour = 'bounds'),
                data = acte.frechet$effects,
                width = .01, size = 1) +
  facet_grid(treatment ~ C, labeller = labeller(C = C.labels)) +
  scale_color_manual(values = c(naive = blue_medium, bounds = red_medium),
                     guide = FALSE) +
  xlab(expression('Sensitivity parameter ' * (rho))) +
  ylab(expression('Expected change in sentiment index, ' * tau("a, a'| c"))) +
  scale_x_continuous(breaks = seq(0, .25, .1)) +
  scale_y_continuous(breaks=seq(-.5,.5,.25), limits = c(-.425, .425)) +
  ggtitle('Sensitivity Analysis for Sentiment toward Media\n') +
  theme(plot.title = element_text(hjust = 0.5))

1.6 Analysis for binary method

To end we will implement the analysis for the binary variable using the function bounds.binary, imported from Dean Knox GitHub. This is an even heavier computationally heavy operation, which may take more than a few hours depending on the computer.

Show Answer

if (file.exists('./results/acte_binary_posterior5k.rds')){
  acte.binary <- readRDS('./results/acte_binary_posterior5k.rds')
} else {
  set.seed(02139)
  acte.binary <- bounds.binary(
    data = d.binary,
    effects.mat = effects.mat,
    posterior = TRUE,
    n_MC = 5000,
    quantiles = c(.025,.975),
    sensitivity = TRUE,
    rhos = seq(0, 0.25, .01),
    verbose = 2
  )
  saveRDS(acte.binary, './results/acte_binary_posterior5k.rds')
}

Again we will skip the estimation and download the results from Knox GitHub. We then compute the parameters and create the visualization of the effects.

Show Answer

# Alternative to skip the simmulation
con <- "https://raw.githubusercontent.com/dcknox/ppt/master/results/acte_binary_posterior5k.rds"
acte.binary <- load_rds_remote(con)

### plot binary bounds ###

med_labels <- c('pro-attitudinal','counter-attitudinal','entertainment')

plot_effects <- acte.binary$effects

plot_effects$min.cilo <- acte.binary$posterior$effects.ci$min_cilo
## plot_effects$min.cihi <- effects.cihi$min
plot_effects$max.cihi <- acte.binary$posterior$effects.ci$max_cihi
## plot_effects$max.cihi <- effects.cihi$max

plot_effects$naive <- acte.binary$naive$effects$naive
plot_effects$naive.cilo <-
  acte.binary$posterior$naive_effects.ci$min_cilo
plot_effects$naive.cihi <-
  acte.binary$posterior$naive_effects.ci$max_cihi

plot_ate <- unique(lapply(1:nrow(plot_effects),function(i){
  out <- as.numeric(plot_effects[i,c('ctrl','treat')])
  test <- t.test(d.binary$Y[d.binary$D==1 & d.binary$A==out[2]],
                 d.binary$Y[d.binary$D==1 & d.binary$A==out[1]])
  out <- c(out,-diff(test$estimate),test$conf.int)
}))

plot_ate <- as.data.frame(do.call(rbind,plot_ate))
colnames(plot_ate) <- c('ctrl','treat','ate','ate.cilo','ate.cihi')
plot_ate$C <- 0

plot_effects <- rbind.fill(plot_effects,plot_ate)

plot_effects$C <-
  factor(plot_effects$C,levels=0:J,labels=c('pooled\n(ATE)',med_labels))
plot_effects$ctrl <-
  factor(plot_effects$ctrl,levels=1:J,labels= "a' = " %.% med_labels)
plot_effects$treat <-
  factor(plot_effects$treat,levels=1:J,labels= 'a = ' %.% med_labels)
plot_effects$treatment <-
  factor(plot_effects$treat %.% '\n' %.% plot_effects$ctrl)
plot_effects$treatment <-
  relevel(plot_effects$treatment,levels(plot_effects$treatment)[3])
plot_effects$ctrl <- plot_effects$treat <- NULL

plot_effects.melt <- melt(plot_effects,id.vars=c('C','treatment'))
plot_effects.melt$type <- ifelse(grepl('min|max', plot_effects.melt$variable),
                                 'bounds',
                                 'naive')
plot_effects.melt$var <- sapply(as.character(plot_effects.melt$variable),function(x){
  switch(x,
         min='min',
         max='max',
         min.cilo='cilo',
         min.cihi=NA,
         max.cilo=NA,
         max.cihi='cihi',
         naive='point',
         naive.cilo='cilo',
         naive.cihi='cihi',
         ate='point',
         ate.cilo='cilo',
         ate.cihi='cihi'
         )
})
plot_effects.melt$variable <- NULL
plot_effects <- dcast(na.omit(plot_effects.melt), C + treatment + type ~ var)
plot_effects$type <- factor(plot_effects$type,levels=c('naive','bounds'))
plot_effects$C <-
  factor(plot_effects$C,levels=c('pooled\n(ATE)',med_labels),
         labels=c('pooled\n(ATE)',
                  'pro-\nattitudinal',
                  'counter-\nattitudinal',
                  'enter-\ntainment'),
         ordered = TRUE)

ggplot(plot_effects, aes(x = C, colour = type, linetype = type)) +
  geom_hline(yintercept = 0) +
  geom_errorbar(aes(ymin = cilo, ymax = cihi),
                width = .5,
                position = position_dodge(width = .5)) +
  geom_errorbar(aes(ymin = min, ymax = max),
                width = .25,
                size = 1.25,
                position = position_dodge(width = .5)) +
  geom_point(aes(y = point),
             position = position_dodge(width = .5),
             size = 1.5) +
  geom_errorbar(aes(ymin = cilo, ymax = cihi),
                data = plot_effects[plot_effects$C == 'pooled\n(ATE)',],
                width = .5,
                size = 1,
                linetype = 'solid') +
  geom_point(aes(y = point),
             data = plot_effects[plot_effects$C=='pooled\n(ATE)',],
             position = position_dodge(width = .5),
             size = 2.5) +
  facet_grid(treatment ~ .) +
  scale_colour_manual(values = c(naive = blue_medium, bounds = red_medium),
                      guide = FALSE) +
  scale_linetype_manual(values = c(naive = 'dashed', bounds = 'solid'),
                        guide = FALSE) +
  ylab(expression('Expected change in probability of discussing, ' * tau *"(a, a' | c)")) +
  xlab(expression('Among subjects who would choose ' * (C[i]))) +
  scale_y_continuous(breaks = seq(-.5, .5, .25),
                     limits = c(-.65, .65)) +
  ggtitle('Discuss Story with Friends (binary)\n') +
  theme(plot.title = element_text(hjust = 0.5))

1.7 Sensitivite Analysis for binary method

Let’s again replicate the sensitivity analysis proposed on the paper, this time for binary variables. Again, as specified before, when computing the results, the function also performed the sensitivity analysis. So we repeat the process.

Show Answer

### plot previously computed sensitivity results for frechet method ###
acte.binary$sensitivity$effects$treatment <-
  factor(treatment.labels[acte.binary$sensitivity$effects$treat %.%
                            acte.binary$sensitivity$effects$ctrl],
         levels = treatment.labels)

## naive estimates (should be about the same as normal posterior)
acte.binary$naive.ci <- as.data.frame(acte.binary$naive$effects)
acte.binary$naive.ci$min <-
  acte.binary$posterior$naive_effects.ci$min_cilo
acte.binary$naive.ci$max <-
  acte.binary$posterior$naive_effects.ci$max_cihi
acte.binary$naive.ci$treatment <-
  factor(treatment.labels[acte.binary$naive.ci$treat %.% acte.binary$naive.ci$ctrl],
         levels = treatment.labels)

## bounds on treatment effects
acte.binary$posterior$effects.ci <-
  as.data.frame(acte.binary$posterior$effects.quantiles[,, 'quantile_0.025'])
acte.binary$posterior$effects.ci$max <-
  acte.binary$posterior$effects.quantiles[, 'max', 'quantile_0.975']
acte.binary$posterior$effects.ci$treatment <-
  factor(treatment.labels[acte.binary$posterior$effects.ci$treat %.%
                            acte.binary$posterior$effects.ci$ctrl],
         levels = treatment.labels)

## sensitivity analysis for treatment effects
acte.binary$sensitivity$effects.ci <- as.data.frame(
  acte.binary$posterior$sens_effects.quantiles[,, 'quantile_0.025']
)
acte.binary$sensitivity$effects.ci$max <-
  acte.binary$posterior$sens_effects.quantiles[, 'max', 'quantile_0.975']
acte.binary$sensitivity$effects.ci$treatment <-
  factor(treatment.labels[acte.binary$sensitivity$effects.ci$treat %.%
                            acte.binary$sensitivity$effects.ci$ctrl],
         levels = treatment.labels)

## naive estimates assume rho == 0
##   and bounds equivalent to rho == Inf
##   (for plotting purposes, place bounds at maximum rho)
acte.binary$naive$effects$rho <- 0
acte.binary$naive$effects$treatment <-
  factor(treatment.labels[acte.binary$naive$effects$treat %.%
                            acte.binary$naive$effects$ctrl],
         levels = treatment.labels)

acte.binary$naive.ci$rho <- 0
acte.binary$naive.ci$treatment <-
  factor(treatment.labels[acte.binary$naive.ci$treat %.%
                            acte.binary$naive.ci$ctrl],
         levels = treatment.labels)

acte.binary$effects$rho <-
  max(acte.binary$sensitivity$effects$rho[
    is.finite(acte.binary$sensitivity$effects$rho)
  ])
acte.binary$effects$treatment <-
  factor(treatment.labels[acte.binary$effects$treat %.%
                            acte.binary$effects$ctrl],
         levels = treatment.labels)

acte.binary$posterior$effects.ci$rho <-
  max(acte.binary$sensitivity$effects$rho[
    is.finite(acte.binary$sensitivity$effects$rho)
  ])
acte.binary$posterior$effects.ci$treatment <-
  factor(treatment.labels[acte.binary$posterior$effects.ci$treat %.%
                            acte.binary$posterior$effects.ci$ctrl],
         levels = treatment.labels)


ggplot(acte.binary$sensitivity$effects[
  is.finite(acte.binary$sensitivity$effects$rho),], aes(x=rho)) +
  geom_ribbon(aes(ymin=min, ymax=max),
              data=acte.binary$sensitivity$effects.ci[
                is.finite(acte.binary$sensitivity$effects.ci$rho),],
              fill='#80808080') +
  geom_ribbon(aes(ymin=min, ymax=max),
              fill='#40404080') +
  geom_hline(yintercept = 0, linetype = 'dashed', colour = 'black') +
  geom_errorbar(aes(ymin = min, ymax = max, colour = 'naive'),
                data = acte.binary$naive.ci,
                width = .02, size = .5, linetype = 'dashed') +
  geom_point(aes(y = naive, colour = 'naive'),
             data = acte.binary$naive$effects) +
  geom_errorbar(aes(ymin = min, ymax = max, colour = 'bounds'),
                data = acte.binary$posterior$effects.ci,
                width = .015, size = .5) +
  geom_errorbar(aes(ymin = min, ymax = max, colour = 'bounds'),
                data = acte.binary$effects,
                width = .01, size = 1) +
  facet_grid(treatment ~ C, labeller = labeller(C = C.labels)) +
  scale_color_manual(values = c(naive = blue_medium, bounds = red_medium),
                     guide = FALSE) +
  xlab(expression('Sensitivity parameter ' * (rho))) +
  ylab(expression('Expected change in recomendation probability, ' * tau("a, a'| c"))) +
  scale_x_continuous(breaks = seq(0, .25, .1)) +
  scale_y_continuous(breaks=seq(-.5,.5,.25), limits = c(-.425, .425)) +
  ggtitle('Sensitivity Analysis for Reccomendation\n') +
  theme(plot.title = element_text(hjust = 0.5))