1. Replication

The following code replicates the set-up of the Bail et al. (2018) study. Our results are nearly identical, although not entirely. This could be due to the fact that many covariates are not included in the replication materials.

Compliance

#Compliance measure
twitter_data$complier_scale<-0
twitter_data$complier_scale <-
rowSums(twitter_data[,c("substantive_question_correct_wave_2",
"substantive_question_correct_wave_3",
"substantive_question_correct_wave_4",
"animal_correct_wave_2",
"animal_correct_wave_3",
"animal_correct_wave_4")], na.rm=TRUE)
#partial complier dummy
twitter_data$half_complier<-0
twitter_data$half_complier[twitter_data$complier_scale>0&
twitter_data$complier_scale<6]<-1
#construct full complier dummy
twitter_data$perfect_complier<-0
twitter_data$perfect_complier[twitter_data$complier_scale==6]<-1
partial_compliance_rate<-nrow(twitter_data[(twitter_data$half_complier==1),])/
nrow(twitter_data[(twitter_data$half_complier==0),])
full_compliance_rate<-nrow(twitter_data[(twitter_data$perfect_complier==1),])/
nrow(twitter_data[(twitter_data$perfect_complier==0),])

Outcome and Control Variables

#invert questions that prime liberal values
twitter_data$government_should_regulate_businesses_wave_1<-
8-twitter_data$government_should_regulate_businesses_wave_1
twitter_data$racial_discrimination_hurts_black_people_wave_1<-
8-twitter_data$racial_discrimination_hurts_black_people_wave_1
twitter_data$immigrants_strengthen_country_wave_1<-
8-twitter_data$immigrants_strengthen_country_wave_1
twitter_data$corporations_make_too_much_profit_wave_1<-
8-twitter_data$corporations_make_too_much_profit_wave_1
twitter_data$homosexuality_should_be_accepted_wave_1<-
8-twitter_data$homosexuality_should_be_accepted_wave_1
twitter_data$government_should_regulate_businesses_wave_5<-
8-twitter_data$government_should_regulate_businesses_wave_5
twitter_data$racial_discrimination_hurts_black_people_wave_5<-
8-twitter_data$racial_discrimination_hurts_black_people_wave_5
twitter_data$immigrants_strengthen_country_wave_5<-
8-twitter_data$immigrants_strengthen_country_wave_5
twitter_data$corporations_make_too_much_profit_wave_5<-
8-twitter_data$corporations_make_too_much_profit_wave_5
twitter_data$homosexuality_should_be_accepted_wave_5<-
8-twitter_data$homosexuality_should_be_accepted_wave_5
#calculate chronbach's alpha
alpha_calc<-twitter_data[,c(
"government_should_regulate_businesses_wave_1",
"racial_discrimination_hurts_black_people_wave_1",
"immigrants_strengthen_country_wave_1",
"corporations_make_too_much_profit_wave_1",
"homosexuality_should_be_accepted_wave_1",
"government_wasteful_inefficient_wave_1",
"poor_people_have_it_easy_wave_1",
"government_cannot_afford_to_help_needy_wave_1",
"best_way_peace_military_strength_wave_1",
"stricter_environmental_laws_damaging_wave_1")]
library(psych)
psych::alpha(alpha_calc)

## 
## Reliability analysis   
## Call: psych::alpha(x = alpha_calc)
## 
##   raw_alpha std.alpha G6(smc) average_r S/N    ase mean  sd median_r
##       0.91      0.91    0.91      0.51  10 0.0036  3.4 1.5     0.51
## 
##  lower alpha upper     95% confidence boundaries
## 0.91 0.91 0.92 
## 
##  Reliability if an item is dropped:
##                                                 raw_alpha std.alpha
## government_should_regulate_businesses_wave_1         0.91      0.90
## racial_discrimination_hurts_black_people_wave_1      0.90      0.90
## immigrants_strengthen_country_wave_1                 0.91      0.90
## corporations_make_too_much_profit_wave_1             0.91      0.91
## homosexuality_should_be_accepted_wave_1              0.91      0.91
## government_wasteful_inefficient_wave_1               0.91      0.91
## poor_people_have_it_easy_wave_1                      0.90      0.90
## government_cannot_afford_to_help_needy_wave_1        0.90      0.90
## best_way_peace_military_strength_wave_1              0.90      0.90
## stricter_environmental_laws_damaging_wave_1          0.89      0.89
##                                                 G6(smc) average_r  S/N
## government_should_regulate_businesses_wave_1       0.90      0.51  9.5
## racial_discrimination_hurts_black_people_wave_1    0.90      0.50  9.0
## immigrants_strengthen_country_wave_1               0.90      0.51  9.5
## corporations_make_too_much_profit_wave_1           0.91      0.53 10.0
## homosexuality_should_be_accepted_wave_1            0.91      0.52  9.9
## government_wasteful_inefficient_wave_1             0.91      0.53 10.1
## poor_people_have_it_easy_wave_1                    0.90      0.50  9.1
## government_cannot_afford_to_help_needy_wave_1      0.90      0.51  9.3
## best_way_peace_military_strength_wave_1            0.90      0.51  9.4
## stricter_environmental_laws_damaging_wave_1        0.89      0.49  8.5
##                                                 alpha se  var.r med.r
## government_should_regulate_businesses_wave_1      0.0040 0.0090  0.52
## racial_discrimination_hurts_black_people_wave_1   0.0042 0.0086  0.49
## immigrants_strengthen_country_wave_1              0.0040 0.0082  0.51
## corporations_make_too_much_profit_wave_1          0.0038 0.0069  0.52
## homosexuality_should_be_accepted_wave_1           0.0038 0.0079  0.53
## government_wasteful_inefficient_wave_1            0.0038 0.0066  0.53
## poor_people_have_it_easy_wave_1                   0.0042 0.0074  0.51
## government_cannot_afford_to_help_needy_wave_1     0.0041 0.0079  0.51
## best_way_peace_military_strength_wave_1           0.0040 0.0083  0.51
## stricter_environmental_laws_damaging_wave_1       0.0044 0.0057  0.49
## 
##  Item statistics 
##                                                    n raw.r std.r r.cor
## government_should_regulate_businesses_wave_1    1235  0.73  0.73  0.70
## racial_discrimination_hurts_black_people_wave_1 1238  0.80  0.80  0.78
## immigrants_strengthen_country_wave_1            1234  0.74  0.74  0.70
## corporations_make_too_much_profit_wave_1        1227  0.67  0.68  0.63
## homosexuality_should_be_accepted_wave_1         1230  0.68  0.68  0.63
## government_wasteful_inefficient_wave_1          1234  0.66  0.66  0.61
## poor_people_have_it_easy_wave_1                 1231  0.80  0.79  0.77
## government_cannot_afford_to_help_needy_wave_1   1230  0.77  0.77  0.74
## best_way_peace_military_strength_wave_1         1230  0.76  0.76  0.72
## stricter_environmental_laws_damaging_wave_1     1228  0.87  0.87  0.87
##                                                 r.drop mean  sd
## government_should_regulate_businesses_wave_1      0.66  3.0 1.7
## racial_discrimination_hurts_black_people_wave_1   0.74  4.0 2.1
## immigrants_strengthen_country_wave_1              0.67  2.9 1.8
## corporations_make_too_much_profit_wave_1          0.59  3.3 2.0
## homosexuality_should_be_accepted_wave_1           0.60  2.6 2.0
## government_wasteful_inefficient_wave_1            0.58  4.4 1.9
## poor_people_have_it_easy_wave_1                   0.74  3.2 2.1
## government_cannot_afford_to_help_needy_wave_1     0.71  3.1 2.0
## best_way_peace_military_strength_wave_1           0.69  4.1 2.0
## stricter_environmental_laws_damaging_wave_1       0.83  3.3 2.1
## 
## Non missing response frequency for each item
##                                                    1    2    3    4    5
## government_should_regulate_businesses_wave_1    0.25 0.20 0.22 0.10 0.10
## racial_discrimination_hurts_black_people_wave_1 0.13 0.19 0.20 0.07 0.07
## immigrants_strengthen_country_wave_1            0.28 0.25 0.14 0.14 0.06
## corporations_make_too_much_profit_wave_1        0.23 0.18 0.16 0.16 0.07
## homosexuality_should_be_accepted_wave_1         0.46 0.17 0.07 0.13 0.03
## government_wasteful_inefficient_wave_1          0.07 0.16 0.12 0.11 0.22
## poor_people_have_it_easy_wave_1                 0.32 0.17 0.10 0.07 0.13
## government_cannot_afford_to_help_needy_wave_1   0.31 0.19 0.11 0.10 0.11
## best_way_peace_military_strength_wave_1         0.11 0.17 0.14 0.15 0.16
## stricter_environmental_laws_damaging_wave_1     0.30 0.16 0.10 0.11 0.11
##                                                    6    7 miss
## government_should_regulate_businesses_wave_1    0.08 0.04 0.00
## racial_discrimination_hurts_black_people_wave_1 0.13 0.20 0.00
## immigrants_strengthen_country_wave_1            0.07 0.06 0.00
## corporations_make_too_much_profit_wave_1        0.10 0.09 0.01
## homosexuality_should_be_accepted_wave_1         0.06 0.08 0.01
## government_wasteful_inefficient_wave_1          0.16 0.17 0.00
## poor_people_have_it_easy_wave_1                 0.11 0.10 0.01
## government_cannot_afford_to_help_needy_wave_1   0.11 0.07 0.01
## best_way_peace_military_strength_wave_1         0.13 0.15 0.01
## stricter_environmental_laws_damaging_wave_1     0.12 0.10 0.01

#create average score by wave
twitter_data$substantive_ideology_scale_wave_1<-rowMeans(twitter_data[,c(
"government_should_regulate_businesses_wave_1",
"racial_discrimination_hurts_black_people_wave_1",
"immigrants_strengthen_country_wave_1",
"corporations_make_too_much_profit_wave_1",
"homosexuality_should_be_accepted_wave_1",
"government_wasteful_inefficient_wave_1",
"poor_people_have_it_easy_wave_1",
"government_cannot_afford_to_help_needy_wave_1",
"best_way_peace_military_strength_wave_1",
"stricter_environmental_laws_damaging_wave_1")], na.rm=TRUE)
twitter_data$substantive_ideology_scale_wave_5<-rowMeans(twitter_data[,c(
"government_should_regulate_businesses_wave_5",
"racial_discrimination_hurts_black_people_wave_5",
"immigrants_strengthen_country_wave_5",
"corporations_make_too_much_profit_wave_5",
"homosexuality_should_be_accepted_wave_5",
"government_wasteful_inefficient_wave_5",
"poor_people_have_it_easy_wave_5",
"government_cannot_afford_to_help_needy_wave_5",
"best_way_peace_military_strength_wave_5",
"stricter_environmental_laws_damaging_wave_5")], na.rm=TRUE)
#Control Variables
control_variables<-twitter_data[,c(
"percent_co_party",
"political_wave_1",
"freq_twitter_wave_1",
"friends_count_wave_1",
"birth_year",
"family_income",
"education",
"gender",
"ideo_homogeneity_offline",
"northeast",
"north_central",
"south",
"west",
"caseid",
"bin_maker")]

Missing Data and Imputed Values

#examine missing data in first wave
library(Amelia)

## Loading required package: Rcpp

## ## 
## ## Amelia II: Multiple Imputation
## ## (Version 1.7.5, built: 2018-05-07)
## ## Copyright (C) 2005-2019 James Honaker, Gary King and Matthew Blackwell
## ## Refer to http://gking.harvard.edu/amelia/ for more information
## ##

missmap(control_variables,main = fig_title("Missingness Map"))

#impute missing data from first wave
forimputation<-cbind(twitter_data$substantive_ideology_scale_wave_1, control_variables)
colnames(forimputation)[colnames(forimputation)==
"twitter_data$substantive_ideology_scale_wave_1"]<-
"substantive_ideology_scale_wave_1"
library(mice)

## Loading required package: lattice

## 
## Attaching package: 'mice'

## The following objects are masked from 'package:base':
## 
##     cbind, rbind

#prepare variables for imputation
forimputation$caseid<-as.character(forimputation$caseid)
#take log of variables with heavy skew
forimputation$percent_co_party<-log(forimputation$percent_co_party+1)
forimputation$friends_count_wave_1<-log(forimputation$friends_count_wave_1+1)
#impute
imputed_data <- mice(forimputation,m=15,seed=352,
exclude=c("caseid","bin_maker"))

## 
##  iter imp variable
##   1   1  substantive_ideology_scale_wave_1*  percent_co_party  friends_count_wave_1*  birth_year  family_income  education  gender  ideo_homogeneity_offline*  northeast*  north_central*  south
##   1   2  substantive_ideology_scale_wave_1*  percent_co_party*  friends_count_wave_1  birth_year*  family_income*  education*  gender*  ideo_homogeneity_offline*  northeast*  north_central  south*
##   1   3  substantive_ideology_scale_wave_1*  percent_co_party*  friends_count_wave_1*  birth_year*  family_income*  education*  gender  ideo_homogeneity_offline  northeast  north_central  south*
##   1   4  substantive_ideology_scale_wave_1  percent_co_party*  friends_count_wave_1*  birth_year  family_income*  education  gender*  ideo_homogeneity_offline  northeast  north_central*  south*
##   1   5  substantive_ideology_scale_wave_1  percent_co_party  friends_count_wave_1  birth_year*  family_income  education*  gender  ideo_homogeneity_offline  northeast*  north_central*  south
##   1   6  substantive_ideology_scale_wave_1  percent_co_party*  friends_count_wave_1  birth_year*  family_income  education  gender  ideo_homogeneity_offline  northeast*  north_central*  south
##   1   7  substantive_ideology_scale_wave_1  percent_co_party*  friends_count_wave_1  birth_year  family_income*  education  gender*  ideo_homogeneity_offline  northeast  north_central  south
##   1   8  substantive_ideology_scale_wave_1*  percent_co_party  friends_count_wave_1  birth_year*  family_income  education*  gender*  ideo_homogeneity_offline*  northeast*  north_central  south
##   1   9  substantive_ideology_scale_wave_1*  percent_co_party*  friends_count_wave_1  birth_year  family_income*  education  gender  ideo_homogeneity_offline*  northeast  north_central  south*
##   1   10  substantive_ideology_scale_wave_1*  percent_co_party*  friends_count_wave_1  birth_year*  family_income  education  gender  ideo_homogeneity_offline*  northeast*  north_central*  south
##   1   11  substantive_ideology_scale_wave_1  percent_co_party*  friends_count_wave_1  birth_year  family_income  education*  gender  ideo_homogeneity_offline  northeast*  north_central  south
##   1   12  substantive_ideology_scale_wave_1*  percent_co_party  friends_count_wave_1*  birth_year  family_income  education*  gender  ideo_homogeneity_offline  northeast  north_central  south
##   1   13  substantive_ideology_scale_wave_1*  percent_co_party*  friends_count_wave_1*  birth_year*  family_income*  education  gender  ideo_homogeneity_offline*  northeast*  north_central  south
##   1   14  substantive_ideology_scale_wave_1*  percent_co_party  friends_count_wave_1*  birth_year  family_income  education*  gender  ideo_homogeneity_offline  northeast  north_central  south
##   1   15  substantive_ideology_scale_wave_1*  percent_co_party*  friends_count_wave_1  birth_year  family_income  education  gender  ideo_homogeneity_offline*  northeast*  north_central*  south
##   2   1  substantive_ideology_scale_wave_1  percent_co_party*  friends_count_wave_1*  birth_year*  family_income*  education  gender  ideo_homogeneity_offline  northeast*  north_central*  south
##   2   2  substantive_ideology_scale_wave_1*  percent_co_party  friends_count_wave_1  birth_year  family_income*  education  gender  ideo_homogeneity_offline*  northeast  north_central  south
##   2   3  substantive_ideology_scale_wave_1  percent_co_party*  friends_count_wave_1*  birth_year*  family_income*  education*  gender*  ideo_homogeneity_offline*  northeast*  north_central*  south
##   2   4  substantive_ideology_scale_wave_1*  percent_co_party  friends_count_wave_1  birth_year*  family_income*  education*  gender*  ideo_homogeneity_offline  northeast  north_central  south
##   2   5  substantive_ideology_scale_wave_1*  percent_co_party*  friends_count_wave_1*  birth_year*  family_income*  education*  gender*  ideo_homogeneity_offline  northeast  north_central*  south
##   2   6  substantive_ideology_scale_wave_1  percent_co_party*  friends_count_wave_1*  birth_year  family_income  education  gender*  ideo_homogeneity_offline  northeast*  north_central*  south*
##   2   7  substantive_ideology_scale_wave_1*  percent_co_party  friends_count_wave_1  birth_year*  family_income*  education  gender  ideo_homogeneity_offline*  northeast  north_central*  south*
##   2   8  substantive_ideology_scale_wave_1  percent_co_party*  friends_count_wave_1*  birth_year  family_income  education*  gender*  ideo_homogeneity_offline  northeast*  north_central  south
##   2   9  substantive_ideology_scale_wave_1  percent_co_party*  friends_count_wave_1  birth_year  family_income  education*  gender*  ideo_homogeneity_offline  northeast*  north_central  south*
##   2   10  substantive_ideology_scale_wave_1  percent_co_party  friends_count_wave_1*  birth_year*  family_income*  education*  gender*  ideo_homogeneity_offline  northeast*  north_central  south
##   2   11  substantive_ideology_scale_wave_1*  percent_co_party  friends_count_wave_1  birth_year*  family_income  education*  gender  ideo_homogeneity_offline  northeast  north_central*  south
##   2   12  substantive_ideology_scale_wave_1*  percent_co_party*  friends_count_wave_1  birth_year  family_income  education*  gender  ideo_homogeneity_offline*  northeast*  north_central  south*
##   2   13  substantive_ideology_scale_wave_1  percent_co_party*  friends_count_wave_1  birth_year  family_income  education  gender  ideo_homogeneity_offline  northeast  north_central  south
##   2   14  substantive_ideology_scale_wave_1  percent_co_party*  friends_count_wave_1  birth_year*  family_income*  education*  gender*  ideo_homogeneity_offline*  northeast  north_central  south*
##   2   15  substantive_ideology_scale_wave_1  percent_co_party  friends_count_wave_1*  birth_year  family_income  education*  gender*  ideo_homogeneity_offline*  northeast  north_central  south
##   3   1  substantive_ideology_scale_wave_1*  percent_co_party*  friends_count_wave_1*  birth_year*  family_income  education  gender  ideo_homogeneity_offline*  northeast*  north_central  south
##   3   2  substantive_ideology_scale_wave_1  percent_co_party*  friends_count_wave_1*  birth_year*  family_income  education  gender  ideo_homogeneity_offline*  northeast*  north_central  south
##   3   3  substantive_ideology_scale_wave_1*  percent_co_party  friends_count_wave_1  birth_year  family_income*  education  gender  ideo_homogeneity_offline  northeast*  north_central*  south*
##   3   4  substantive_ideology_scale_wave_1  percent_co_party*  friends_count_wave_1  birth_year*  family_income*  education*  gender  ideo_homogeneity_offline*  northeast*  north_central  south
##   3   5  substantive_ideology_scale_wave_1*  percent_co_party*  friends_count_wave_1*  birth_year*  family_income  education  gender*  ideo_homogeneity_offline*  northeast*  north_central*  south
##   3   6  substantive_ideology_scale_wave_1*  percent_co_party  friends_count_wave_1  birth_year*  family_income  education  gender*  ideo_homogeneity_offline  northeast  north_central*  south*
##   3   7  substantive_ideology_scale_wave_1  percent_co_party  friends_count_wave_1*  birth_year*  family_income*  education*  gender*  ideo_homogeneity_offline  northeast*  north_central*  south
##   3   8  substantive_ideology_scale_wave_1*  percent_co_party  friends_count_wave_1  birth_year*  family_income  education  gender  ideo_homogeneity_offline*  northeast*  north_central  south*
##   3   9  substantive_ideology_scale_wave_1  percent_co_party  friends_count_wave_1*  birth_year*  family_income*  education*  gender  ideo_homogeneity_offline*  northeast  north_central  south
##   3   10  substantive_ideology_scale_wave_1  percent_co_party  friends_count_wave_1  birth_year  family_income*  education  gender  ideo_homogeneity_offline*  northeast*  north_central  south*
##   3   11  substantive_ideology_scale_wave_1*  percent_co_party  friends_count_wave_1  birth_year*  family_income*  education  gender*  ideo_homogeneity_offline  northeast*  north_central  south
##   3   12  substantive_ideology_scale_wave_1*  percent_co_party  friends_count_wave_1*  birth_year*  family_income*  education*  gender*  ideo_homogeneity_offline*  northeast*  north_central  south
##   3   13  substantive_ideology_scale_wave_1  percent_co_party  friends_count_wave_1*  birth_year  family_income*  education  gender*  ideo_homogeneity_offline  northeast  north_central  south*
##   3   14  substantive_ideology_scale_wave_1*  percent_co_party  friends_count_wave_1  birth_year  family_income  education*  gender  ideo_homogeneity_offline*  northeast*  north_central  south
##   3   15  substantive_ideology_scale_wave_1*  percent_co_party  friends_count_wave_1  birth_year  family_income  education*  gender*  ideo_homogeneity_offline  northeast*  north_central  south
##   4   1  substantive_ideology_scale_wave_1*  percent_co_party*  friends_count_wave_1*  birth_year*  family_income  education  gender  ideo_homogeneity_offline*  northeast*  north_central  south
##   4   2  substantive_ideology_scale_wave_1*  percent_co_party*  friends_count_wave_1*  birth_year*  family_income*  education*  gender  ideo_homogeneity_offline*  northeast  north_central  south
##   4   3  substantive_ideology_scale_wave_1  percent_co_party  friends_count_wave_1  birth_year*  family_income  education  gender*  ideo_homogeneity_offline*  northeast  north_central  south
##   4   4  substantive_ideology_scale_wave_1  percent_co_party  friends_count_wave_1  birth_year*  family_income*  education*  gender*  ideo_homogeneity_offline  northeast*  north_central  south
##   4   5  substantive_ideology_scale_wave_1  percent_co_party*  friends_count_wave_1*  birth_year*  family_income  education  gender*  ideo_homogeneity_offline  northeast*  north_central*  south
##   4   6  substantive_ideology_scale_wave_1  percent_co_party  friends_count_wave_1  birth_year*  family_income  education*  gender*  ideo_homogeneity_offline  northeast  north_central  south
##   4   7  substantive_ideology_scale_wave_1  percent_co_party  friends_count_wave_1*  birth_year*  family_income  education*  gender  ideo_homogeneity_offline*  northeast*  north_central  south
##   4   8  substantive_ideology_scale_wave_1  percent_co_party*  friends_count_wave_1  birth_year  family_income  education  gender  ideo_homogeneity_offline  northeast*  north_central  south
##   4   9  substantive_ideology_scale_wave_1*  percent_co_party*  friends_count_wave_1*  birth_year  family_income*  education  gender  ideo_homogeneity_offline*  northeast  north_central*  south*
##   4   10  substantive_ideology_scale_wave_1  percent_co_party*  friends_count_wave_1*  birth_year*  family_income  education*  gender  ideo_homogeneity_offline*  northeast  north_central  south
##   4   11  substantive_ideology_scale_wave_1*  percent_co_party*  friends_count_wave_1*  birth_year  family_income  education  gender*  ideo_homogeneity_offline  northeast*  north_central  south*
##   4   12  substantive_ideology_scale_wave_1  percent_co_party  friends_count_wave_1  birth_year*  family_income*  education*  gender  ideo_homogeneity_offline  northeast  north_central  south
##   4   13  substantive_ideology_scale_wave_1*  percent_co_party  friends_count_wave_1*  birth_year  family_income*  education  gender  ideo_homogeneity_offline*  northeast  north_central  south
##   4   14  substantive_ideology_scale_wave_1  percent_co_party  friends_count_wave_1  birth_year*  family_income*  education  gender*  ideo_homogeneity_offline  northeast  north_central*  south
##   4   15  substantive_ideology_scale_wave_1  percent_co_party*  friends_count_wave_1  birth_year  family_income*  education*  gender*  ideo_homogeneity_offline  northeast*  north_central  south
##   5   1  substantive_ideology_scale_wave_1*  percent_co_party*  friends_count_wave_1*  birth_year*  family_income  education  gender*  ideo_homogeneity_offline*  northeast  north_central  south*
##   5   2  substantive_ideology_scale_wave_1*  percent_co_party*  friends_count_wave_1  birth_year*  family_income  education  gender*  ideo_homogeneity_offline*  northeast*  north_central  south
##   5   3  substantive_ideology_scale_wave_1*  percent_co_party  friends_count_wave_1  birth_year*  family_income  education  gender  ideo_homogeneity_offline  northeast*  north_central*  south
##   5   4  substantive_ideology_scale_wave_1  percent_co_party*  friends_count_wave_1  birth_year  family_income*  education*  gender  ideo_homogeneity_offline  northeast*  north_central  south
##   5   5  substantive_ideology_scale_wave_1  percent_co_party*  friends_count_wave_1  birth_year*  family_income  education*  gender*  ideo_homogeneity_offline  northeast  north_central*  south
##   5   6  substantive_ideology_scale_wave_1*  percent_co_party*  friends_count_wave_1*  birth_year*  family_income  education*  gender*  ideo_homogeneity_offline  northeast  north_central*  south
##   5   7  substantive_ideology_scale_wave_1*  percent_co_party*  friends_count_wave_1  birth_year  family_income*  education  gender*  ideo_homogeneity_offline*  northeast*  north_central*  south*
##   5   8  substantive_ideology_scale_wave_1  percent_co_party  friends_count_wave_1  birth_year  family_income*  education*  gender*  ideo_homogeneity_offline  northeast  north_central*  south
##   5   9  substantive_ideology_scale_wave_1*  percent_co_party  friends_count_wave_1*  birth_year  family_income*  education  gender  ideo_homogeneity_offline  northeast  north_central*  south
##   5   10  substantive_ideology_scale_wave_1*  percent_co_party  friends_count_wave_1*  birth_year*  family_income  education  gender*  ideo_homogeneity_offline  northeast*  north_central  south*
##   5   11  substantive_ideology_scale_wave_1  percent_co_party  friends_count_wave_1*  birth_year*  family_income*  education  gender*  ideo_homogeneity_offline  northeast  north_central  south
##   5   12  substantive_ideology_scale_wave_1  percent_co_party*  friends_count_wave_1*  birth_year  family_income*  education*  gender*  ideo_homogeneity_offline*  northeast  north_central*  south
##   5   13  substantive_ideology_scale_wave_1  percent_co_party  friends_count_wave_1  birth_year*  family_income*  education  gender  ideo_homogeneity_offline  northeast  north_central  south*
##   5   14  substantive_ideology_scale_wave_1*  percent_co_party*  friends_count_wave_1*  birth_year  family_income*  education*  gender  ideo_homogeneity_offline  northeast  north_central  south
##   5   15  substantive_ideology_scale_wave_1  percent_co_party  friends_count_wave_1*  birth_year*  family_income  education  gender*  ideo_homogeneity_offline  northeast*  north_central  south*
##  * Please inspect the loggedEvents

## Warning: Number of logged events: 1214

imputed_data <- mice::complete(imputed_data,action=15)
twitter_data$strong_partisan_wave_1 <- ifelse(twitter_data$party_id_weak_strong_wave_1 == 1 | twitter_data$party_id_weak_strong_wave_1 == 7, 1, 0)
#reassemble dataaset with additional variables we need for subsequent analysis
to_bind<-twitter_data[,c("treat",
"perfect_complier",
"half_complier",
"bot_followers",
"party_id_wave_1",
"strong_partisan_wave_1",
"substantive_ideology_scale_wave_5",
"endtime_wave_5")]
final_data<- cbind(to_bind, imputed_data)
save(final_data, file="Final Data for Models.Rdata")

2. Heterogenous treatment effects

In order to use various ML methods, we create a high dimensional dataset by including interactions and squared terms in our data.

Creating Interactions

# These are the covariates we'll use

cts_variables_names <- c("percent_co_party",
                         "ideo_homogeneity_offline",
                         "friends_count_wave_1","education","family_income",
                         "birth_year","endtime_wave_5",
                         "substantive_ideology_scale_wave_1")
binary_variables_names <- c("gender","northeast","north_central",
                            "south", "west","party_id_wave_1","political_wave_1", 
                            "strong_partisan_wave_1","freq_twitter_wave_1",
                            "bot_followers", "half_complier", 
                            "perfect_complier",
                           "bin_maker","caseid")
covariates <- c(cts_variables_names, binary_variables_names)
all_variables_names <- c(covariates,
                         "substantive_ideology_scale_wave_5", "treat")

# Extracting continuous variables
cts_covariates <- matrix(final_data %>%
  dplyr::select(cts_variables_names) %>%
  unlist(as.numeric(as.character())),nrow = nrow(final_data))

colnames(cts_covariates) <- cts_variables_names

# Scaling continuous variables
scaled_cts_covariates <- data.frame(cts_covariates %>%
  scale())

# Extracting indicator variables
binary_covariates <- matrix(final_data %>%
  dplyr::select(binary_variables_names) %>%
  unlist(as.character()) %>% as.numeric(),nrow = nrow(final_data))

colnames(binary_covariates) <- binary_variables_names

# Extracting outcome and treatment
outcome <- final_data %>% dplyr::select(substantive_ideology_scale_wave_5)
treatment <- final_data %>% dplyr::select(treat)

# Setting up the data, renaming columns and discarding rows with NA (if any)
df <- data.frame(scaled_cts_covariates, binary_covariates, outcome, treatment) %>%
  plyr::rename(c(substantive_ideology_scale_wave_5 = "Y",
                 treat = "W")) %>%
  na.omit() %>%
  add_count(bin_maker)                          # will look at NA values

# set weights for bin sizes
df$weights <- 1/df$n

3. ITT Effects

This section calculates the intent to treat effects using the Lasso model to select covariates.

ITT set up

################################################################
## ITT
################################################################


## LASSO 

library(amlinear)
library(ivpack)

## Loading required package: AER

## Loading required package: car

## Loading required package: carData

## 
## Attaching package: 'car'

## The following object is masked from 'package:psych':
## 
##     logit

## The following object is masked from 'package:dplyr':
## 
##     recode

## Loading required package: lmtest

## Loading required package: zoo

## 
## Attaching package: 'zoo'

## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric

## Loading required package: survival

# regress treat on interaction variables (seperately 
# for full compliers, half compliers, and bot followers)
# * does it make sense to include news, party, and strength rather than bin?*
df.t <- df[,c("W",
              "substantive_ideology_scale_wave_1",
              "percent_co_party",
              "friends_count_wave_1",
              "birth_year",
              "family_income",
              "education",
              "gender",
              "ideo_homogeneity_offline",
              "northeast",
              "north_central",
              "south",
              "strong_partisan_wave_1",
              "freq_twitter_wave_1",
              "party_id_wave_1",
              "political_wave_1")]


# replace 0s in W with -1
df.t$W = df.t$W -1*(1-df.t$W)

df.t.int <- model.matrix(~ . * ., data = df.t)

df.t.2 <- sapply(df.t[,c("percent_co_party","ideo_homogeneity_offline",
                         "friends_count_wave_1","education","family_income",
                         "birth_year","substantive_ideology_scale_wave_1")],
                 function(x) x^2)

df.t.mod <- cbind(df.t.int,df.t.2)

ITT Models

# try LASSO model to select instruments
library(glmnet)

# create hyperparameter grid
hyper_grid <- expand.grid(
  alpha = c(0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1),
  nfolds = c(5,10)
)

# create nonzero results grid

non_zero.t <- matrix(nrow = ncol(df.t.mod),ncol = 22)

for(i in 1:nrow(hyper_grid)){
  lasso.fit.t <- cv.glmnet(df.t.mod, df$Y, weights = df$weights, 
                         lambda = seq(0.001,0.1,by = 0.001),
                         alpha = hyper_grid$alpha[i], 
                         nfolds = hyper_grid$nfolds[i])
  
  coef.t <- coef(lasso.fit.t, s = "lambda.min")
  coef.t[which(coef.t != 0)]
  vars.t <- coef.t@Dimnames[[1]][which(coef.t != 0 ) ]
  #print(vars.t)   ### should do cool graph here
  non_zero.t[,i] <- colnames(df.t.mod) %in% vars.t
}

# examine number of times each covariate shows up in models
rownames(non_zero.t) <- colnames(df.t.mod)
sort(apply(non_zero.t,1,sum))

##                                    northeast:north_central 
##                                                          0 
##                                            northeast:south 
##                                                          0 
##                                                          W 
##                                                          2 
##                                           percent_co_party 
##                                                          2 
##                                       friends_count_wave_1 
##                                                          2 
##                                              family_income 
##                                                          2 
##                                                     gender 
##                                                          2 
##                                                  northeast 
##                                                          2 
##                                              north_central 
##                                                          2 
##                                                      south 
##                                                          2 
##                                     strong_partisan_wave_1 
##                                                          2 
##                                        freq_twitter_wave_1 
##                                                          2 
##                                               W:birth_year 
##                                                          2 
##                                            W:north_central 
##                                                          2 
##                                                    W:south 
##                                                          2 
##                                      W:freq_twitter_wave_1 
##                                                          2 
##                                         W:political_wave_1 
##                                                          2 
##     substantive_ideology_scale_wave_1:friends_count_wave_1 
##                                                          2 
##               substantive_ideology_scale_wave_1:birth_year 
##                                                          2 
## substantive_ideology_scale_wave_1:ideo_homogeneity_offline 
##                                                          2 
##                             percent_co_party:family_income 
##                                                          2 
##                                    percent_co_party:gender 
##                                                          2 
##                           percent_co_party:party_id_wave_1 
##                                                          2 
##                         friends_count_wave_1:family_income 
##                                                          2 
##                             friends_count_wave_1:education 
##                                                          2 
##                                friends_count_wave_1:gender 
##                                                          2 
##                         friends_count_wave_1:north_central 
##                                                          2 
##                                 friends_count_wave_1:south 
##                                                          2 
##                   friends_count_wave_1:freq_twitter_wave_1 
##                                                          2 
##                       friends_count_wave_1:party_id_wave_1 
##                                                          2 
##                      friends_count_wave_1:political_wave_1 
##                                                          2 
##                                   birth_year:family_income 
##                                                          2 
##                                          birth_year:gender 
##                                                          2 
##                        birth_year:ideo_homogeneity_offline 
##                                                          2 
##                                       birth_year:northeast 
##                                                          2 
##                          birth_year:strong_partisan_wave_1 
##                                                          2 
##                             birth_year:freq_twitter_wave_1 
##                                                          2 
##                                birth_year:political_wave_1 
##                                                          2 
##                                family_income:north_central 
##                                                          2 
##                              family_income:party_id_wave_1 
##                                                          2 
##                             family_income:political_wave_1 
##                                                          2 
##                                           education:gender 
##                                                          2 
##                                        education:northeast 
##                                                          2 
##                           education:strong_partisan_wave_1 
##                                                          2 
##                              education:freq_twitter_wave_1 
##                                                          2 
##                                  education:party_id_wave_1 
##                                                          2 
##                            gender:ideo_homogeneity_offline 
##                                                          2 
##                                       gender:north_central 
##                                                          2 
##                                               gender:south 
##                                                          2 
##                              gender:strong_partisan_wave_1 
##                                                          2 
##                                 gender:freq_twitter_wave_1 
##                                                          2 
##                                     gender:party_id_wave_1 
##                                                          2 
##                             ideo_homogeneity_offline:south 
##                                                          2 
##               ideo_homogeneity_offline:freq_twitter_wave_1 
##                                                          2 
##                   ideo_homogeneity_offline:party_id_wave_1 
##                                                          2 
##                  ideo_homogeneity_offline:political_wave_1 
##                                                          2 
##                           northeast:strong_partisan_wave_1 
##                                                          2 
##                              northeast:freq_twitter_wave_1 
##                                                          2 
##                                  northeast:party_id_wave_1 
##                                                          2 
##                                 northeast:political_wave_1 
##                                                          2 
##                                        north_central:south 
##                                                          2 
##                       north_central:strong_partisan_wave_1 
##                                                          2 
##                          north_central:freq_twitter_wave_1 
##                                                          2 
##                             north_central:political_wave_1 
##                                                          2 
##                                      south:party_id_wave_1 
##                                                          2 
##                 strong_partisan_wave_1:freq_twitter_wave_1 
##                                                          2 
##                    strong_partisan_wave_1:political_wave_1 
##                                                          2 
##                           party_id_wave_1:political_wave_1 
##                                                          2 
##                                           percent_co_party 
##                                                          2 
##                                       friends_count_wave_1 
##                                                          2 
##                                              family_income 
##                                                          2 
##                                                   W:gender 
##                                                          3 
##                                 W:ideo_homogeneity_offline 
##                                                          3 
##         substantive_ideology_scale_wave_1:political_wave_1 
##                                                          3 
##                            friends_count_wave_1:birth_year 
##                                                          3 
##                             friends_count_wave_1:northeast 
##                                                          3 
##                     family_income:ideo_homogeneity_offline 
##                                                          3 
##                                 education:political_wave_1 
##                                                          3 
##                                                  education 
##                                                          4 
##                                                W:northeast 
##                                                          4 
##            substantive_ideology_scale_wave_1:north_central 
##                                                          4 
##                  percent_co_party:ideo_homogeneity_offline 
##                                                          4 
##                       percent_co_party:freq_twitter_wave_1 
##                                                          4 
##              friends_count_wave_1:ideo_homogeneity_offline 
##                                                          4 
##                                   birth_year:north_central 
##                                                          4 
##                                    gender:political_wave_1 
##                                                          4 
##                              north_central:party_id_wave_1 
##                                                          4 
##                                                  education 
##                                                          4 
##                                         W:percent_co_party 
##                                                          5 
##                                     W:friends_count_wave_1 
##                                                          5 
##                                            W:family_income 
##                                                          5 
##                                 percent_co_party:education 
##                                                          5 
##                                 percent_co_party:northeast 
##                                                          5 
##                                           birth_year:south 
##                                                          5 
##                                       family_income:gender 
##                                                          5 
##                                        family_income:south 
##                                                          5 
##                       family_income:strong_partisan_wave_1 
##                                                          5 
##                         education:ideo_homogeneity_offline 
##                                                          5 
##                                    education:north_central 
##                                                          5 
##                     ideo_homogeneity_offline:north_central 
##                                                          5 
##            ideo_homogeneity_offline:strong_partisan_wave_1 
##                                                          5 
##                               south:strong_partisan_wave_1 
##                                                          5 
##                                  south:freq_twitter_wave_1 
##                                                          5 
##                        W:substantive_ideology_scale_wave_1 
##                                                          6 
##                      percent_co_party:friends_count_wave_1 
##                                                          6 
##                          percent_co_party:political_wave_1 
##                                                          6 
##                                       birth_year:education 
##                                                          6 
##                                 birth_year:party_id_wave_1 
##                                                          6 
##                         ideo_homogeneity_offline:northeast 
##                                                          6 
##                                           political_wave_1 
##                                                          7 
##                                     percent_co_party:south 
##                                                          7 
##                                   ideo_homogeneity_offline 
##                                                          8 
##                                    family_income:education 
##                                                          8 
##                                   ideo_homogeneity_offline 
##                                                          8 
##                                                W:education 
##                                                          9 
##                                           gender:northeast 
##                                                          9 
##                                                 birth_year 
##                                                         11 
##                             percent_co_party:north_central 
##                                                         11 
##                                                 birth_year 
##                                                         11 
##                   substantive_ideology_scale_wave_1:gender 
##                                                         13 
##                    substantive_ideology_scale_wave_1:south 
##                                                         13 
##                                            education:south 
##                                                         13 
##                friends_count_wave_1:strong_partisan_wave_1 
##                                                         14 
##                          family_income:freq_twitter_wave_1 
##                                                         14 
##                       freq_twitter_wave_1:political_wave_1 
##                                                         14 
##                                percent_co_party:birth_year 
##                                                         15 
##                                   W:strong_partisan_wave_1 
##                                                         16 
##            substantive_ideology_scale_wave_1:family_income 
##                                                         16 
##                substantive_ideology_scale_wave_1:northeast 
##                                                         17 
##                                    family_income:northeast 
##                                                         18 
##                                     south:political_wave_1 
##                                                         18 
##                                          W:party_id_wave_1 
##                                                         19 
##         substantive_ideology_scale_wave_1:percent_co_party 
##                                                         19 
##                    percent_co_party:strong_partisan_wave_1 
##                                                         19 
##      substantive_ideology_scale_wave_1:freq_twitter_wave_1 
##                                                         20 
##          substantive_ideology_scale_wave_1:party_id_wave_1 
##                                                         20 
##                substantive_ideology_scale_wave_1:education 
##                                                         21 
##                                                (Intercept) 
##                                                         22 
##                          substantive_ideology_scale_wave_1 
##                                                         22 
##                                            party_id_wave_1 
##                                                         22 
##   substantive_ideology_scale_wave_1:strong_partisan_wave_1 
##                                                         22 
##                     strong_partisan_wave_1:party_id_wave_1 
##                                                         22 
##                        freq_twitter_wave_1:party_id_wave_1 
##                                                         22 
##                          substantive_ideology_scale_wave_1 
##                                                         22

df.lm <- data.frame(cbind(df.t.mod,df$Y,df$W,df$bin_maker))

# change non-treated values back to 0
df.lm$W = (df.t$W+1)/2
colnames(df.lm) <- c(colnames(df.t.mod),"Y","W","bin_maker")

democrats <- df.lm[df$party_id_wave_1 == 1,]
republicans <- df.lm[df$party_id_wave_1 == 2,]

# Qualitatively pick from vars.t and the sort function above
#Republicans
republican_ITT_int_model<-lm(Y~
                           #treatment assignment variable
                           W+
                           #pre-treatment ideology score
                           substantive_ideology_scale_wave_1+
                           #% of people followed on Twitter from same party
                           percent_co_party+
                           #demographics
                           birth_year +
                           family_income+
                           education+
                           northeast+
                           south+
                           family_income:northeast+
                           percent_co_party:birth_year+ 
                           # instead of factor, we broke bins into vars
                           substantive_ideology_scale_wave_1:education+
                           substantive_ideology_scale_wave_1:political_wave_1+
                           political_wave_1+
                           south+
                           south:political_wave_1+
                           strong_partisan_wave_1+
                           freq_twitter_wave_1,
                         data=republicans)
coefficients<-data.frame(summary(republican_ITT_int_model)$coefficients)
library(pander)
panderOptions('digits',3)
panderOptions('table.split.table', 300)
set.caption(tab_title("Intent-to-Treat Interactions Model (Republicans)"))
pander(coefficients)

Table 1: Intent-to-Treat Interactions Model (Republicans)
	Estimate	Std..Error	t.value	Pr…t..
(Intercept)	3.37	0.144	23.5	4.83e-78
W	0.0913	0.0454	2.01	0.0449
substantive_ideology_scale_wave_1	1.42	0.108	13.2	1.25e-33
percent_co_party	0.0374	0.0255	1.47	0.143
birth_year	0.00999	0.0308	0.324	0.746
family_income	0.00875	0.0272	0.322	0.748
education	-0.042	0.0386	-1.09	0.277
northeast	0.0231	0.0648	0.356	0.722
south	-0.0134	0.135	-0.0993	0.921
political_wave_1	-0.00323	0.0906	-0.0356	0.972
strong_partisan_wave_1	0.169	0.0477	3.55	0.000433
freq_twitter_wave_1	0.0989	0.0451	2.19	0.0291
family_income:northeast	-0.0828	0.0541	-1.53	0.127
percent_co_party:birth_year	0.0164	0.0246	0.668	0.504
substantive_ideology_scale_wave_1:education	0.0513	0.033	1.56	0.121
substantive_ideology_scale_wave_1:political_wave_1	-0.136	0.0806	-1.69	0.0927
south:political_wave_1	0.000248	0.0964	0.00258	0.998

#Democrats
democrat_ITT_int_model<-lm(Y~
                            #treatment assignment variable
                             W+
                             #pre-treatment ideology score
                             substantive_ideology_scale_wave_1+
                             #% of people followed on Twitter from same party
                             percent_co_party+
                             #demographics
                             birth_year +
                             family_income+
                             education+
                             northeast+
                             south+
                             family_income:northeast+
                             percent_co_party:birth_year+ 
                             # instead of factor, we broke bins into vars
                             substantive_ideology_scale_wave_1:education+
                             substantive_ideology_scale_wave_1:political_wave_1+
                             political_wave_1+
                             south+
                             south:political_wave_1+
                             strong_partisan_wave_1+
                             freq_twitter_wave_1,
                       data=democrats)
coefficients<-data.frame(summary(democrat_ITT_int_model)$coefficients)
library(pander)
panderOptions('digits',3)
panderOptions('table.split.table', 300)
set.caption(tab_title("Intent-to-Treat Interactions Model (Democrats)"))
pander(coefficients)

Table 2: Intent-to-Treat Interactions Model (Democrats)
	Estimate	Std..Error	t.value	Pr…t..
(Intercept)	3.02	0.108	27.9	1.33e-109
W	-0.0166	0.0325	-0.512	0.609
substantive_ideology_scale_wave_1	1.38	0.0994	13.9	5.26e-38
percent_co_party	-0.00479	0.0239	-0.2	0.841
birth_year	0.0181	0.0216	0.836	0.404
family_income	0.00607	0.0194	0.313	0.754
education	0.0469	0.0251	1.87	0.0616
northeast	-0.0442	0.0429	-1.03	0.303
south	0.309	0.107	2.89	0.00395
political_wave_1	0.242	0.0638	3.79	0.000164
strong_partisan_wave_1	-0.0963	0.0374	-2.58	0.0102
freq_twitter_wave_1	0.0066	0.0337	0.196	0.845
family_income:northeast	-0.0797	0.042	-1.9	0.0582
percent_co_party:birth_year	-0.0601	0.0246	-2.44	0.0148
substantive_ideology_scale_wave_1:education	0.0518	0.0289	1.79	0.0734
substantive_ideology_scale_wave_1:political_wave_1	-0.108	0.0777	-1.39	0.164
south:political_wave_1	-0.27	0.084	-3.21	0.00139

4. CACE

Complier Average Causal Effects are calculated using the Lasso model (to select covariates) and the Two Stage Least Squares model.

CACE set up

Lasso Democrats

###### LASSO FOR DEMOCRATS

# try LASSO model to select instruments
library(glmnet)

# create hyperparameter grid
hyper_grid <- expand.grid(
  alpha = c(0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1),
  nfolds = c(5,10)
)

# create nonzero results grid

non_zero.x <- matrix(nrow = ncol(democrats),ncol = 22)

set.seed(1)
for(i in 1:nrow(hyper_grid)){
  lasso.fit.x <- cv.glmnet(democrats, df$Y[dems], weights = df$weights[dems], 
                         lambda = seq(0.001,0.1,by = 0.001),
                         alpha = hyper_grid$alpha[i], 
                         nfolds = hyper_grid$nfolds[i])
  
  coef.x <- coef(lasso.fit.x, s = "lambda.min")
  coef.x[which(coef.x != 0)]
  vars.x <- coef.x@Dimnames[[1]][which(coef.x != 0 ) ]
  #print(vars.x)   ### should do cool graph here
  non_zero.x[,i] <- colnames(democrats) %in% vars.x
}

# later examine number of times each covariate shows up in models
## now we decide which variables to add
rownames(non_zero.x) <- colnames(df.x.mod)
sort(apply(non_zero.x,1,sum))

##                                            party_id_wave_1 
##                                                          0 
##                                    northeast:north_central 
##                                                          0 
##                                            northeast:south 
##                                                          0 
##                                           percent_co_party 
##                                                          2 
##                                       friends_count_wave_1 
##                                                          2 
##                                                 birth_year 
##                                                          2 
##                                                  education 
##                                                          2 
##                                   ideo_homogeneity_offline 
##                                                          2 
##                                                  northeast 
##                                                          2 
##                                              north_central 
##                                                          2 
##                                                      south 
##                                                          2 
##                                        freq_twitter_wave_1 
##                                                          2 
##         substantive_ideology_scale_wave_1:percent_co_party 
##                                                          2 
##     substantive_ideology_scale_wave_1:friends_count_wave_1 
##                                                          2 
##            substantive_ideology_scale_wave_1:family_income 
##                                                          2 
##   substantive_ideology_scale_wave_1:strong_partisan_wave_1 
##                                                          2 
##                                 percent_co_party:education 
##                                                          2 
##                                    percent_co_party:gender 
##                                                          2 
##                                 percent_co_party:northeast 
##                                                          2 
##                       percent_co_party:freq_twitter_wave_1 
##                                                          2 
##                           percent_co_party:party_id_wave_1 
##                                                          2 
##                          percent_co_party:political_wave_1 
##                                                          2 
##                            friends_count_wave_1:birth_year 
##                                                          2 
##                         friends_count_wave_1:family_income 
##                                                          2 
##                             friends_count_wave_1:education 
##                                                          2 
##              friends_count_wave_1:ideo_homogeneity_offline 
##                                                          2 
##                             friends_count_wave_1:northeast 
##                                                          2 
##                         friends_count_wave_1:north_central 
##                                                          2 
##                                 friends_count_wave_1:south 
##                                                          2 
##                       friends_count_wave_1:party_id_wave_1 
##                                                          2 
##                      friends_count_wave_1:political_wave_1 
##                                                          2 
##                                   birth_year:family_income 
##                                                          2 
##                                       birth_year:northeast 
##                                                          2 
##                                           birth_year:south 
##                                                          2 
##                          birth_year:strong_partisan_wave_1 
##                                                          2 
##                                 birth_year:party_id_wave_1 
##                                                          2 
##                                birth_year:political_wave_1 
##                                                          2 
##                                       family_income:gender 
##                                                          2 
##                                family_income:north_central 
##                                                          2 
##                       family_income:strong_partisan_wave_1 
##                                                          2 
##                          family_income:freq_twitter_wave_1 
##                                                          2 
##                             family_income:political_wave_1 
##                                                          2 
##                                           education:gender 
##                                                          2 
##                         education:ideo_homogeneity_offline 
##                                                          2 
##                                    education:north_central 
##                                                          2 
##                                            education:south 
##                                                          2 
##                              education:freq_twitter_wave_1 
##                                                          2 
##                                  education:party_id_wave_1 
##                                                          2 
##                                 education:political_wave_1 
##                                                          2 
##                            gender:ideo_homogeneity_offline 
##                                                          2 
##                                               gender:south 
##                                                          2 
##                              gender:strong_partisan_wave_1 
##                                                          2 
##                                 gender:freq_twitter_wave_1 
##                                                          2 
##                     ideo_homogeneity_offline:north_central 
##                                                          2 
##                             ideo_homogeneity_offline:south 
##                                                          2 
##               ideo_homogeneity_offline:freq_twitter_wave_1 
##                                                          2 
##                   ideo_homogeneity_offline:party_id_wave_1 
##                                                          2 
##                  ideo_homogeneity_offline:political_wave_1 
##                                                          2 
##                              northeast:freq_twitter_wave_1 
##                                                          2 
##                                  northeast:party_id_wave_1 
##                                                          2 
##                                 northeast:political_wave_1 
##                                                          2 
##                       north_central:strong_partisan_wave_1 
##                                                          2 
##                          north_central:freq_twitter_wave_1 
##                                                          2 
##                              north_central:party_id_wave_1 
##                                                          2 
##                             north_central:political_wave_1 
##                                                          2 
##                               south:strong_partisan_wave_1 
##                                                          2 
##                                      south:party_id_wave_1 
##                                                          2 
##                 strong_partisan_wave_1:freq_twitter_wave_1 
##                                                          2 
##                    strong_partisan_wave_1:political_wave_1 
##                                                          2 
##                        freq_twitter_wave_1:party_id_wave_1 
##                                                          2 
##                                     friends_count_wave_1.2 
##                                                          2 
##                                                     gender 
##                                                          4 
##               substantive_ideology_scale_wave_1:birth_year 
##                                                          4 
##                substantive_ideology_scale_wave_1:education 
##                                                          4 
## substantive_ideology_scale_wave_1:ideo_homogeneity_offline 
##                                                          4 
##            substantive_ideology_scale_wave_1:north_central 
##                                                          4 
##                    substantive_ideology_scale_wave_1:south 
##                                                          4 
##                      percent_co_party:friends_count_wave_1 
##                                                          4 
##                             percent_co_party:north_central 
##                                                          4 
##                                     percent_co_party:south 
##                                                          4 
##                    percent_co_party:strong_partisan_wave_1 
##                                                          4 
##                                friends_count_wave_1:gender 
##                                                          4 
##                   friends_count_wave_1:freq_twitter_wave_1 
##                                                          4 
##                                          birth_year:gender 
##                                                          4 
##                        birth_year:ideo_homogeneity_offline 
##                                                          4 
##                             birth_year:freq_twitter_wave_1 
##                                                          4 
##                                    family_income:education 
##                                                          4 
##                     family_income:ideo_homogeneity_offline 
##                                                          4 
##                                        family_income:south 
##                                                          4 
##                                        education:northeast 
##                                                          4 
##                           education:strong_partisan_wave_1 
##                                                          4 
##                                           gender:northeast 
##                                                          4 
##                                     gender:party_id_wave_1 
##                                                          4 
##                                        north_central:south 
##                                                          4 
##                                         percent_co_party.2 
##                                                          4 
##                                               birth_year.2 
##                                                          4 
##                                              family_income 
##                                                          6 
##         substantive_ideology_scale_wave_1:political_wave_1 
##                                                          6 
##                friends_count_wave_1:strong_partisan_wave_1 
##                                                          6 
##                                   birth_year:north_central 
##                                                          6 
##                                    family_income:northeast 
##                                                          6 
##                              family_income:party_id_wave_1 
##                                                          6 
##                         ideo_homogeneity_offline:northeast 
##                                                          6 
##            ideo_homogeneity_offline:strong_partisan_wave_1 
##                                                          6 
##                                  south:freq_twitter_wave_1 
##                                                          6 
##                       freq_twitter_wave_1:political_wave_1 
##                                                          6 
##                                            family_income.2 
##                                                          6 
##                                       gender:north_central 
##                                                         11 
##                   substantive_ideology_scale_wave_1:gender 
##                                                         12 
##                             percent_co_party:family_income 
##                                                         12 
##                                       birth_year:education 
##                                                         12 
##                                                education.2 
##                                                         12 
##      substantive_ideology_scale_wave_1:freq_twitter_wave_1 
##                                                         14 
##                           northeast:strong_partisan_wave_1 
##                                                         15 
##                  percent_co_party:ideo_homogeneity_offline 
##                                                         18 
##                           party_id_wave_1:political_wave_1 
##                                                         20 
##                                                (Intercept) 
##                                                         22 
##                          substantive_ideology_scale_wave_1 
##                                                         22 
##                                     strong_partisan_wave_1 
##                                                         22 
##                                           political_wave_1 
##                                                         22 
##                substantive_ideology_scale_wave_1:northeast 
##                                                         22 
##          substantive_ideology_scale_wave_1:party_id_wave_1 
##                                                         22 
##                                percent_co_party:birth_year 
##                                                         22 
##                                    gender:political_wave_1 
##                                                         22 
##                                     south:political_wave_1 
##                                                         22 
##                     strong_partisan_wave_1:party_id_wave_1 
##                                                         22 
##                                 ideo_homogeneity_offline.2 
##                                                         22 
##                        substantive_ideology_scale_wave_1.2 
##                                                         22

print(vars.x)

##  [1] "(Intercept)"                                      
##  [2] "substantive_ideology_scale_wave_1"                
##  [3] "strong_partisan_wave_1"                           
##  [4] "political_wave_1"                                 
##  [5] "substantive_ideology_scale_wave_1:northeast"      
##  [6] "substantive_ideology_scale_wave_1:party_id_wave_1"
##  [7] "percent_co_party:birth_year"                      
##  [8] "percent_co_party:family_income"                   
##  [9] "percent_co_party:ideo_homogeneity_offline"        
## [10] "birth_year:education"                             
## [11] "gender:north_central"                             
## [12] "gender:political_wave_1"                          
## [13] "northeast:strong_partisan_wave_1"                 
## [14] "south:political_wave_1"                           
## [15] "strong_partisan_wave_1:party_id_wave_1"           
## [16] "ideo_homogeneity_offline.2"                       
## [17] "education.2"                                      
## [18] "substantive_ideology_scale_wave_1.2"

to_include_dems <- c(vars.x[-1]) # can add variables of importance here

Lasso Republicans

###### LASSO FOR REPUBLICANS

# try LASSO model to select instruments
library(glmnet)

# create hyperparameter grid
hyper_grid <- expand.grid(
  alpha = c(0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1),
  nfolds = c(5,10)
)

# create nonzero results grid

non_zero.x <- matrix(nrow = ncol(republicans),ncol = 22)

set.seed(1)
for(i in 1:nrow(hyper_grid)){
  lasso.fit.x <- cv.glmnet(republicans, df$Y[gop], weights = df$weights[gop], 
                           lambda = seq(0.001,0.1,by = 0.001),
                           alpha = hyper_grid$alpha[i], 
                           nfolds = hyper_grid$nfolds[i])
  
  coef.x <- coef(lasso.fit.x, s = "lambda.min")
  coef.x[which(coef.x != 0)]
  vars.x <- coef.x@Dimnames[[1]][which(coef.x != 0 ) ]
  print(vars.x)   ### should do cool graph here
  non_zero.x[,i] <- colnames(democrats) %in% vars.x
}

##   [1] "(Intercept)"                                               
##   [2] "substantive_ideology_scale_wave_1"                         
##   [3] "percent_co_party"                                          
##   [4] "friends_count_wave_1"                                      
##   [5] "birth_year"                                                
##   [6] "family_income"                                             
##   [7] "education"                                                 
##   [8] "gender"                                                    
##   [9] "ideo_homogeneity_offline"                                  
##  [10] "northeast"                                                 
##  [11] "north_central"                                             
##  [12] "south"                                                     
##  [13] "strong_partisan_wave_1"                                    
##  [14] "freq_twitter_wave_1"                                       
##  [15] "political_wave_1"                                          
##  [16] "substantive_ideology_scale_wave_1:percent_co_party"        
##  [17] "substantive_ideology_scale_wave_1:friends_count_wave_1"    
##  [18] "substantive_ideology_scale_wave_1:birth_year"              
##  [19] "substantive_ideology_scale_wave_1:family_income"           
##  [20] "substantive_ideology_scale_wave_1:education"               
##  [21] "substantive_ideology_scale_wave_1:gender"                  
##  [22] "substantive_ideology_scale_wave_1:ideo_homogeneity_offline"
##  [23] "substantive_ideology_scale_wave_1:northeast"               
##  [24] "substantive_ideology_scale_wave_1:north_central"           
##  [25] "substantive_ideology_scale_wave_1:south"                   
##  [26] "substantive_ideology_scale_wave_1:strong_partisan_wave_1"  
##  [27] "substantive_ideology_scale_wave_1:freq_twitter_wave_1"     
##  [28] "substantive_ideology_scale_wave_1:party_id_wave_1"         
##  [29] "substantive_ideology_scale_wave_1:political_wave_1"        
##  [30] "percent_co_party:friends_count_wave_1"                     
##  [31] "percent_co_party:birth_year"                               
##  [32] "percent_co_party:family_income"                            
##  [33] "percent_co_party:education"                                
##  [34] "percent_co_party:gender"                                   
##  [35] "percent_co_party:ideo_homogeneity_offline"                 
##  [36] "percent_co_party:northeast"                                
##  [37] "percent_co_party:north_central"                            
##  [38] "percent_co_party:south"                                    
##  [39] "percent_co_party:strong_partisan_wave_1"                   
##  [40] "percent_co_party:freq_twitter_wave_1"                      
##  [41] "percent_co_party:party_id_wave_1"                          
##  [42] "percent_co_party:political_wave_1"                         
##  [43] "friends_count_wave_1:birth_year"                           
##  [44] "friends_count_wave_1:family_income"                        
##  [45] "friends_count_wave_1:education"                            
##  [46] "friends_count_wave_1:gender"                               
##  [47] "friends_count_wave_1:ideo_homogeneity_offline"             
##  [48] "friends_count_wave_1:northeast"                            
##  [49] "friends_count_wave_1:north_central"                        
##  [50] "friends_count_wave_1:south"                                
##  [51] "friends_count_wave_1:strong_partisan_wave_1"               
##  [52] "friends_count_wave_1:freq_twitter_wave_1"                  
##  [53] "friends_count_wave_1:party_id_wave_1"                      
##  [54] "friends_count_wave_1:political_wave_1"                     
##  [55] "birth_year:family_income"                                  
##  [56] "birth_year:education"                                      
##  [57] "birth_year:gender"                                         
##  [58] "birth_year:ideo_homogeneity_offline"                       
##  [59] "birth_year:northeast"                                      
##  [60] "birth_year:north_central"                                  
##  [61] "birth_year:south"                                          
##  [62] "birth_year:strong_partisan_wave_1"                         
##  [63] "birth_year:freq_twitter_wave_1"                            
##  [64] "birth_year:party_id_wave_1"                                
##  [65] "birth_year:political_wave_1"                               
##  [66] "family_income:education"                                   
##  [67] "family_income:gender"                                      
##  [68] "family_income:ideo_homogeneity_offline"                    
##  [69] "family_income:northeast"                                   
##  [70] "family_income:north_central"                               
##  [71] "family_income:south"                                       
##  [72] "family_income:strong_partisan_wave_1"                      
##  [73] "family_income:freq_twitter_wave_1"                         
##  [74] "family_income:party_id_wave_1"                             
##  [75] "family_income:political_wave_1"                            
##  [76] "education:gender"                                          
##  [77] "education:ideo_homogeneity_offline"                        
##  [78] "education:northeast"                                       
##  [79] "education:north_central"                                   
##  [80] "education:south"                                           
##  [81] "education:strong_partisan_wave_1"                          
##  [82] "education:freq_twitter_wave_1"                             
##  [83] "education:party_id_wave_1"                                 
##  [84] "education:political_wave_1"                                
##  [85] "gender:ideo_homogeneity_offline"                           
##  [86] "gender:northeast"                                          
##  [87] "gender:north_central"                                      
##  [88] "gender:south"                                              
##  [89] "gender:strong_partisan_wave_1"                             
##  [90] "gender:freq_twitter_wave_1"                                
##  [91] "gender:party_id_wave_1"                                    
##  [92] "gender:political_wave_1"                                   
##  [93] "ideo_homogeneity_offline:northeast"                        
##  [94] "ideo_homogeneity_offline:north_central"                    
##  [95] "ideo_homogeneity_offline:south"                            
##  [96] "ideo_homogeneity_offline:strong_partisan_wave_1"           
##  [97] "ideo_homogeneity_offline:freq_twitter_wave_1"              
##  [98] "ideo_homogeneity_offline:party_id_wave_1"                  
##  [99] "ideo_homogeneity_offline:political_wave_1"                 
## [100] "northeast:strong_partisan_wave_1"                          
## [101] "northeast:freq_twitter_wave_1"                             
## [102] "northeast:party_id_wave_1"                                 
## [103] "northeast:political_wave_1"                                
## [104] "north_central:south"                                       
## [105] "north_central:strong_partisan_wave_1"                      
## [106] "north_central:freq_twitter_wave_1"                         
## [107] "north_central:party_id_wave_1"                             
## [108] "north_central:political_wave_1"                            
## [109] "south:strong_partisan_wave_1"                              
## [110] "south:freq_twitter_wave_1"                                 
## [111] "south:party_id_wave_1"                                     
## [112] "south:political_wave_1"                                    
## [113] "strong_partisan_wave_1:freq_twitter_wave_1"                
## [114] "strong_partisan_wave_1:party_id_wave_1"                    
## [115] "strong_partisan_wave_1:political_wave_1"                   
## [116] "freq_twitter_wave_1:party_id_wave_1"                       
## [117] "freq_twitter_wave_1:political_wave_1"                      
## [118] "party_id_wave_1:political_wave_1"                          
## [119] "percent_co_party.2"                                        
## [120] "ideo_homogeneity_offline.2"                                
## [121] "friends_count_wave_1.2"                                    
## [122] "education.2"                                               
## [123] "family_income.2"                                           
## [124] "birth_year.2"                                              
## [125] "substantive_ideology_scale_wave_1.2"                       
##  [1] "(Intercept)"                                               
##  [2] "substantive_ideology_scale_wave_1"                         
##  [3] "percent_co_party"                                          
##  [4] "strong_partisan_wave_1"                                    
##  [5] "freq_twitter_wave_1"                                       
##  [6] "political_wave_1"                                          
##  [7] "substantive_ideology_scale_wave_1:family_income"           
##  [8] "substantive_ideology_scale_wave_1:education"               
##  [9] "substantive_ideology_scale_wave_1:gender"                  
## [10] "substantive_ideology_scale_wave_1:ideo_homogeneity_offline"
## [11] "substantive_ideology_scale_wave_1:south"                   
## [12] "substantive_ideology_scale_wave_1:freq_twitter_wave_1"     
## [13] "substantive_ideology_scale_wave_1:party_id_wave_1"         
## [14] "substantive_ideology_scale_wave_1:political_wave_1"        
## [15] "percent_co_party:friends_count_wave_1"                     
## [16] "percent_co_party:education"                                
## [17] "percent_co_party:gender"                                   
## [18] "percent_co_party:ideo_homogeneity_offline"                 
## [19] "percent_co_party:north_central"                            
## [20] "percent_co_party:party_id_wave_1"                          
## [21] "percent_co_party:political_wave_1"                         
## [22] "friends_count_wave_1:family_income"                        
## [23] "friends_count_wave_1:strong_partisan_wave_1"               
## [24] "birth_year:northeast"                                      
## [25] "birth_year:south"                                          
## [26] "birth_year:strong_partisan_wave_1"                         
## [27] "birth_year:political_wave_1"                               
## [28] "family_income:education"                                   
## [29] "family_income:gender"                                      
## [30] "family_income:ideo_homogeneity_offline"                    
## [31] "family_income:northeast"                                   
## [32] "family_income:north_central"                               
## [33] "family_income:strong_partisan_wave_1"                      
## [34] "family_income:freq_twitter_wave_1"                         
## [35] "education:northeast"                                       
## [36] "education:north_central"                                   
## [37] "education:south"                                           
## [38] "education:strong_partisan_wave_1"                          
## [39] "gender:south"                                              
## [40] "gender:political_wave_1"                                   
## [41] "ideo_homogeneity_offline:north_central"                    
## [42] "ideo_homogeneity_offline:strong_partisan_wave_1"           
## [43] "northeast:strong_partisan_wave_1"                          
## [44] "north_central:south"                                       
## [45] "south:strong_partisan_wave_1"                              
## [46] "south:freq_twitter_wave_1"                                 
## [47] "south:political_wave_1"                                    
## [48] "strong_partisan_wave_1:freq_twitter_wave_1"                
## [49] "strong_partisan_wave_1:party_id_wave_1"                    
## [50] "strong_partisan_wave_1:political_wave_1"                   
## [51] "freq_twitter_wave_1:party_id_wave_1"                       
## [52] "party_id_wave_1:political_wave_1"                          
## [53] "substantive_ideology_scale_wave_1.2"                       
##  [1] "(Intercept)"                                          
##  [2] "substantive_ideology_scale_wave_1"                    
##  [3] "percent_co_party"                                     
##  [4] "strong_partisan_wave_1"                               
##  [5] "political_wave_1"                                     
##  [6] "substantive_ideology_scale_wave_1:family_income"      
##  [7] "substantive_ideology_scale_wave_1:education"          
##  [8] "substantive_ideology_scale_wave_1:gender"             
##  [9] "substantive_ideology_scale_wave_1:south"              
## [10] "substantive_ideology_scale_wave_1:freq_twitter_wave_1"
## [11] "substantive_ideology_scale_wave_1:party_id_wave_1"    
## [12] "percent_co_party:education"                           
## [13] "percent_co_party:north_central"                       
## [14] "percent_co_party:south"                               
## [15] "percent_co_party:party_id_wave_1"                     
## [16] "percent_co_party:political_wave_1"                    
## [17] "birth_year:northeast"                                 
## [18] "birth_year:political_wave_1"                          
## [19] "family_income:education"                              
## [20] "family_income:gender"                                 
## [21] "family_income:ideo_homogeneity_offline"               
## [22] "family_income:northeast"                              
## [23] "family_income:freq_twitter_wave_1"                    
## [24] "education:northeast"                                  
## [25] "gender:south"                                         
## [26] "gender:political_wave_1"                              
## [27] "northeast:strong_partisan_wave_1"                     
## [28] "strong_partisan_wave_1:freq_twitter_wave_1"           
## [29] "strong_partisan_wave_1:party_id_wave_1"               
## [30] "party_id_wave_1:political_wave_1"                     
## [31] "substantive_ideology_scale_wave_1.2"                  
##  [1] "(Intercept)"                                          
##  [2] "substantive_ideology_scale_wave_1"                    
##  [3] "percent_co_party"                                     
##  [4] "strong_partisan_wave_1"                               
##  [5] "political_wave_1"                                     
##  [6] "substantive_ideology_scale_wave_1:family_income"      
##  [7] "substantive_ideology_scale_wave_1:education"          
##  [8] "substantive_ideology_scale_wave_1:gender"             
##  [9] "substantive_ideology_scale_wave_1:south"              
## [10] "substantive_ideology_scale_wave_1:freq_twitter_wave_1"
## [11] "substantive_ideology_scale_wave_1:party_id_wave_1"    
## [12] "percent_co_party:south"                               
## [13] "percent_co_party:party_id_wave_1"                     
## [14] "percent_co_party:political_wave_1"                    
## [15] "birth_year:northeast"                                 
## [16] "birth_year:political_wave_1"                          
## [17] "family_income:ideo_homogeneity_offline"               
## [18] "family_income:northeast"                              
## [19] "northeast:strong_partisan_wave_1"                     
## [20] "strong_partisan_wave_1:freq_twitter_wave_1"           
## [21] "strong_partisan_wave_1:party_id_wave_1"               
## [22] "party_id_wave_1:political_wave_1"                     
## [23] "substantive_ideology_scale_wave_1.2"                  
##  [1] "(Intercept)"                                          
##  [2] "substantive_ideology_scale_wave_1"                    
##  [3] "percent_co_party"                                     
##  [4] "strong_partisan_wave_1"                               
##  [5] "political_wave_1"                                     
##  [6] "substantive_ideology_scale_wave_1:family_income"      
##  [7] "substantive_ideology_scale_wave_1:education"          
##  [8] "substantive_ideology_scale_wave_1:freq_twitter_wave_1"
##  [9] "substantive_ideology_scale_wave_1:party_id_wave_1"    
## [10] "percent_co_party:south"                               
## [11] "percent_co_party:party_id_wave_1"                     
## [12] "percent_co_party:political_wave_1"                    
## [13] "birth_year:northeast"                                 
## [14] "birth_year:political_wave_1"                          
## [15] "family_income:ideo_homogeneity_offline"               
## [16] "family_income:northeast"                              
## [17] "northeast:strong_partisan_wave_1"                     
## [18] "strong_partisan_wave_1:freq_twitter_wave_1"           
## [19] "strong_partisan_wave_1:party_id_wave_1"               
## [20] "party_id_wave_1:political_wave_1"                     
##  [1] "(Intercept)"                                          
##  [2] "substantive_ideology_scale_wave_1"                    
##  [3] "percent_co_party"                                     
##  [4] "strong_partisan_wave_1"                               
##  [5] "political_wave_1"                                     
##  [6] "substantive_ideology_scale_wave_1:family_income"      
##  [7] "substantive_ideology_scale_wave_1:education"          
##  [8] "substantive_ideology_scale_wave_1:freq_twitter_wave_1"
##  [9] "substantive_ideology_scale_wave_1:party_id_wave_1"    
## [10] "percent_co_party:south"                               
## [11] "percent_co_party:party_id_wave_1"                     
## [12] "percent_co_party:political_wave_1"                    
## [13] "birth_year:northeast"                                 
## [14] "birth_year:political_wave_1"                          
## [15] "family_income:ideo_homogeneity_offline"               
## [16] "family_income:northeast"                              
## [17] "northeast:strong_partisan_wave_1"                     
## [18] "strong_partisan_wave_1:freq_twitter_wave_1"           
## [19] "party_id_wave_1:political_wave_1"                     
##  [1] "(Intercept)"                                          
##  [2] "substantive_ideology_scale_wave_1"                    
##  [3] "percent_co_party"                                     
##  [4] "political_wave_1"                                     
##  [5] "substantive_ideology_scale_wave_1:family_income"      
##  [6] "substantive_ideology_scale_wave_1:education"          
##  [7] "substantive_ideology_scale_wave_1:freq_twitter_wave_1"
##  [8] "substantive_ideology_scale_wave_1:party_id_wave_1"    
##  [9] "percent_co_party:south"                               
## [10] "percent_co_party:party_id_wave_1"                     
## [11] "percent_co_party:political_wave_1"                    
## [12] "birth_year:northeast"                                 
## [13] "birth_year:political_wave_1"                          
## [14] "family_income:ideo_homogeneity_offline"               
## [15] "northeast:strong_partisan_wave_1"                     
## [16] "strong_partisan_wave_1:freq_twitter_wave_1"           
## [17] "party_id_wave_1:political_wave_1"                     
##  [1] "(Intercept)"                                      
##  [2] "substantive_ideology_scale_wave_1"                
##  [3] "political_wave_1"                                 
##  [4] "substantive_ideology_scale_wave_1:party_id_wave_1"
##  [5] "percent_co_party:south"                           
##  [6] "percent_co_party:political_wave_1"                
##  [7] "birth_year:northeast"                             
##  [8] "birth_year:political_wave_1"                      
##  [9] "strong_partisan_wave_1:freq_twitter_wave_1"       
## [10] "party_id_wave_1:political_wave_1"                 
## [1] "(Intercept)"                                      
## [2] "substantive_ideology_scale_wave_1"                
## [3] "political_wave_1"                                 
## [4] "substantive_ideology_scale_wave_1:party_id_wave_1"
## [5] "percent_co_party:political_wave_1"                
## [6] "birth_year:northeast"                             
## [7] "birth_year:political_wave_1"                      
## [8] "strong_partisan_wave_1:freq_twitter_wave_1"       
## [9] "party_id_wave_1:political_wave_1"                 
## [1] "(Intercept)"                                      
## [2] "substantive_ideology_scale_wave_1"                
## [3] "political_wave_1"                                 
## [4] "substantive_ideology_scale_wave_1:party_id_wave_1"
## [5] "percent_co_party:south"                           
## [6] "percent_co_party:political_wave_1"                
## [7] "birth_year:northeast"                             
## [8] "strong_partisan_wave_1:freq_twitter_wave_1"       
## [9] "party_id_wave_1:political_wave_1"                 
## [1] "(Intercept)"                                      
## [2] "substantive_ideology_scale_wave_1"                
## [3] "political_wave_1"                                 
## [4] "substantive_ideology_scale_wave_1:party_id_wave_1"
## [5] "percent_co_party:south"                           
## [6] "percent_co_party:political_wave_1"                
## [7] "birth_year:northeast"                             
## [8] "strong_partisan_wave_1:freq_twitter_wave_1"       
## [9] "party_id_wave_1:political_wave_1"                 
##   [1] "(Intercept)"                                               
##   [2] "substantive_ideology_scale_wave_1"                         
##   [3] "percent_co_party"                                          
##   [4] "friends_count_wave_1"                                      
##   [5] "birth_year"                                                
##   [6] "family_income"                                             
##   [7] "education"                                                 
##   [8] "gender"                                                    
##   [9] "ideo_homogeneity_offline"                                  
##  [10] "northeast"                                                 
##  [11] "north_central"                                             
##  [12] "south"                                                     
##  [13] "strong_partisan_wave_1"                                    
##  [14] "freq_twitter_wave_1"                                       
##  [15] "political_wave_1"                                          
##  [16] "substantive_ideology_scale_wave_1:percent_co_party"        
##  [17] "substantive_ideology_scale_wave_1:friends_count_wave_1"    
##  [18] "substantive_ideology_scale_wave_1:birth_year"              
##  [19] "substantive_ideology_scale_wave_1:family_income"           
##  [20] "substantive_ideology_scale_wave_1:education"               
##  [21] "substantive_ideology_scale_wave_1:gender"                  
##  [22] "substantive_ideology_scale_wave_1:ideo_homogeneity_offline"
##  [23] "substantive_ideology_scale_wave_1:northeast"               
##  [24] "substantive_ideology_scale_wave_1:north_central"           
##  [25] "substantive_ideology_scale_wave_1:south"                   
##  [26] "substantive_ideology_scale_wave_1:strong_partisan_wave_1"  
##  [27] "substantive_ideology_scale_wave_1:freq_twitter_wave_1"     
##  [28] "substantive_ideology_scale_wave_1:party_id_wave_1"         
##  [29] "substantive_ideology_scale_wave_1:political_wave_1"        
##  [30] "percent_co_party:friends_count_wave_1"                     
##  [31] "percent_co_party:birth_year"                               
##  [32] "percent_co_party:family_income"                            
##  [33] "percent_co_party:education"                                
##  [34] "percent_co_party:gender"                                   
##  [35] "percent_co_party:ideo_homogeneity_offline"                 
##  [36] "percent_co_party:northeast"                                
##  [37] "percent_co_party:north_central"                            
##  [38] "percent_co_party:south"                                    
##  [39] "percent_co_party:strong_partisan_wave_1"                   
##  [40] "percent_co_party:freq_twitter_wave_1"                      
##  [41] "percent_co_party:party_id_wave_1"                          
##  [42] "percent_co_party:political_wave_1"                         
##  [43] "friends_count_wave_1:birth_year"                           
##  [44] "friends_count_wave_1:family_income"                        
##  [45] "friends_count_wave_1:education"                            
##  [46] "friends_count_wave_1:gender"                               
##  [47] "friends_count_wave_1:ideo_homogeneity_offline"             
##  [48] "friends_count_wave_1:northeast"                            
##  [49] "friends_count_wave_1:north_central"                        
##  [50] "friends_count_wave_1:south"                                
##  [51] "friends_count_wave_1:strong_partisan_wave_1"               
##  [52] "friends_count_wave_1:freq_twitter_wave_1"                  
##  [53] "friends_count_wave_1:party_id_wave_1"                      
##  [54] "friends_count_wave_1:political_wave_1"                     
##  [55] "birth_year:family_income"                                  
##  [56] "birth_year:education"                                      
##  [57] "birth_year:gender"                                         
##  [58] "birth_year:ideo_homogeneity_offline"                       
##  [59] "birth_year:northeast"                                      
##  [60] "birth_year:north_central"                                  
##  [61] "birth_year:south"                                          
##  [62] "birth_year:strong_partisan_wave_1"                         
##  [63] "birth_year:freq_twitter_wave_1"                            
##  [64] "birth_year:party_id_wave_1"                                
##  [65] "birth_year:political_wave_1"                               
##  [66] "family_income:education"                                   
##  [67] "family_income:gender"                                      
##  [68] "family_income:ideo_homogeneity_offline"                    
##  [69] "family_income:northeast"                                   
##  [70] "family_income:north_central"                               
##  [71] "family_income:south"                                       
##  [72] "family_income:strong_partisan_wave_1"                      
##  [73] "family_income:freq_twitter_wave_1"                         
##  [74] "family_income:party_id_wave_1"                             
##  [75] "family_income:political_wave_1"                            
##  [76] "education:gender"                                          
##  [77] "education:ideo_homogeneity_offline"                        
##  [78] "education:northeast"                                       
##  [79] "education:north_central"                                   
##  [80] "education:south"                                           
##  [81] "education:strong_partisan_wave_1"                          
##  [82] "education:freq_twitter_wave_1"                             
##  [83] "education:party_id_wave_1"                                 
##  [84] "education:political_wave_1"                                
##  [85] "gender:ideo_homogeneity_offline"                           
##  [86] "gender:northeast"                                          
##  [87] "gender:north_central"                                      
##  [88] "gender:south"                                              
##  [89] "gender:strong_partisan_wave_1"                             
##  [90] "gender:freq_twitter_wave_1"                                
##  [91] "gender:party_id_wave_1"                                    
##  [92] "gender:political_wave_1"                                   
##  [93] "ideo_homogeneity_offline:northeast"                        
##  [94] "ideo_homogeneity_offline:north_central"                    
##  [95] "ideo_homogeneity_offline:south"                            
##  [96] "ideo_homogeneity_offline:strong_partisan_wave_1"           
##  [97] "ideo_homogeneity_offline:freq_twitter_wave_1"              
##  [98] "ideo_homogeneity_offline:party_id_wave_1"                  
##  [99] "ideo_homogeneity_offline:political_wave_1"                 
## [100] "northeast:strong_partisan_wave_1"                          
## [101] "northeast:freq_twitter_wave_1"                             
## [102] "northeast:party_id_wave_1"                                 
## [103] "northeast:political_wave_1"                                
## [104] "north_central:south"                                       
## [105] "north_central:strong_partisan_wave_1"                      
## [106] "north_central:freq_twitter_wave_1"                         
## [107] "north_central:party_id_wave_1"                             
## [108] "north_central:political_wave_1"                            
## [109] "south:strong_partisan_wave_1"                              
## [110] "south:freq_twitter_wave_1"                                 
## [111] "south:party_id_wave_1"                                     
## [112] "south:political_wave_1"                                    
## [113] "strong_partisan_wave_1:freq_twitter_wave_1"                
## [114] "strong_partisan_wave_1:party_id_wave_1"                    
## [115] "strong_partisan_wave_1:political_wave_1"                   
## [116] "freq_twitter_wave_1:party_id_wave_1"                       
## [117] "freq_twitter_wave_1:political_wave_1"                      
## [118] "party_id_wave_1:political_wave_1"                          
## [119] "percent_co_party.2"                                        
## [120] "ideo_homogeneity_offline.2"                                
## [121] "friends_count_wave_1.2"                                    
## [122] "education.2"                                               
## [123] "family_income.2"                                           
## [124] "birth_year.2"                                              
## [125] "substantive_ideology_scale_wave_1.2"                       
##  [1] "(Intercept)"                                               
##  [2] "substantive_ideology_scale_wave_1"                         
##  [3] "percent_co_party"                                          
##  [4] "strong_partisan_wave_1"                                    
##  [5] "freq_twitter_wave_1"                                       
##  [6] "political_wave_1"                                          
##  [7] "substantive_ideology_scale_wave_1:family_income"           
##  [8] "substantive_ideology_scale_wave_1:education"               
##  [9] "substantive_ideology_scale_wave_1:gender"                  
## [10] "substantive_ideology_scale_wave_1:ideo_homogeneity_offline"
## [11] "substantive_ideology_scale_wave_1:south"                   
## [12] "substantive_ideology_scale_wave_1:freq_twitter_wave_1"     
## [13] "substantive_ideology_scale_wave_1:party_id_wave_1"         
## [14] "substantive_ideology_scale_wave_1:political_wave_1"        
## [15] "percent_co_party:friends_count_wave_1"                     
## [16] "percent_co_party:education"                                
## [17] "percent_co_party:gender"                                   
## [18] "percent_co_party:ideo_homogeneity_offline"                 
## [19] "percent_co_party:north_central"                            
## [20] "percent_co_party:party_id_wave_1"                          
## [21] "percent_co_party:political_wave_1"                         
## [22] "friends_count_wave_1:family_income"                        
## [23] "friends_count_wave_1:strong_partisan_wave_1"               
## [24] "birth_year:northeast"                                      
## [25] "birth_year:south"                                          
## [26] "birth_year:strong_partisan_wave_1"                         
## [27] "birth_year:political_wave_1"                               
## [28] "family_income:education"                                   
## [29] "family_income:gender"                                      
## [30] "family_income:ideo_homogeneity_offline"                    
## [31] "family_income:northeast"                                   
## [32] "family_income:north_central"                               
## [33] "family_income:strong_partisan_wave_1"                      
## [34] "family_income:freq_twitter_wave_1"                         
## [35] "education:northeast"                                       
## [36] "education:north_central"                                   
## [37] "education:south"                                           
## [38] "education:strong_partisan_wave_1"                          
## [39] "gender:south"                                              
## [40] "gender:political_wave_1"                                   
## [41] "ideo_homogeneity_offline:north_central"                    
## [42] "ideo_homogeneity_offline:strong_partisan_wave_1"           
## [43] "northeast:strong_partisan_wave_1"                          
## [44] "north_central:south"                                       
## [45] "south:strong_partisan_wave_1"                              
## [46] "south:freq_twitter_wave_1"                                 
## [47] "south:political_wave_1"                                    
## [48] "strong_partisan_wave_1:freq_twitter_wave_1"                
## [49] "strong_partisan_wave_1:party_id_wave_1"                    
## [50] "strong_partisan_wave_1:political_wave_1"                   
## [51] "freq_twitter_wave_1:party_id_wave_1"                       
## [52] "party_id_wave_1:political_wave_1"                          
## [53] "substantive_ideology_scale_wave_1.2"                       
##  [1] "(Intercept)"                                          
##  [2] "substantive_ideology_scale_wave_1"                    
##  [3] "percent_co_party"                                     
##  [4] "strong_partisan_wave_1"                               
##  [5] "political_wave_1"                                     
##  [6] "substantive_ideology_scale_wave_1:family_income"      
##  [7] "substantive_ideology_scale_wave_1:education"          
##  [8] "substantive_ideology_scale_wave_1:gender"             
##  [9] "substantive_ideology_scale_wave_1:south"              
## [10] "substantive_ideology_scale_wave_1:freq_twitter_wave_1"
## [11] "substantive_ideology_scale_wave_1:party_id_wave_1"    
## [12] "percent_co_party:education"                           
## [13] "percent_co_party:north_central"                       
## [14] "percent_co_party:south"                               
## [15] "percent_co_party:party_id_wave_1"                     
## [16] "percent_co_party:political_wave_1"                    
## [17] "birth_year:northeast"                                 
## [18] "birth_year:political_wave_1"                          
## [19] "family_income:education"                              
## [20] "family_income:gender"                                 
## [21] "family_income:ideo_homogeneity_offline"               
## [22] "family_income:northeast"                              
## [23] "family_income:freq_twitter_wave_1"                    
## [24] "education:northeast"                                  
## [25] "gender:south"                                         
## [26] "gender:political_wave_1"                              
## [27] "northeast:strong_partisan_wave_1"                     
## [28] "strong_partisan_wave_1:freq_twitter_wave_1"           
## [29] "strong_partisan_wave_1:party_id_wave_1"               
## [30] "party_id_wave_1:political_wave_1"                     
## [31] "substantive_ideology_scale_wave_1.2"                  
##  [1] "(Intercept)"                                          
##  [2] "substantive_ideology_scale_wave_1"                    
##  [3] "percent_co_party"                                     
##  [4] "strong_partisan_wave_1"                               
##  [5] "political_wave_1"                                     
##  [6] "substantive_ideology_scale_wave_1:family_income"      
##  [7] "substantive_ideology_scale_wave_1:education"          
##  [8] "substantive_ideology_scale_wave_1:south"              
##  [9] "substantive_ideology_scale_wave_1:freq_twitter_wave_1"
## [10] "substantive_ideology_scale_wave_1:party_id_wave_1"    
## [11] "percent_co_party:south"                               
## [12] "percent_co_party:party_id_wave_1"                     
## [13] "percent_co_party:political_wave_1"                    
## [14] "birth_year:northeast"                                 
## [15] "birth_year:political_wave_1"                          
## [16] "family_income:education"                              
## [17] "family_income:ideo_homogeneity_offline"               
## [18] "family_income:northeast"                              
## [19] "family_income:freq_twitter_wave_1"                    
## [20] "education:northeast"                                  
## [21] "northeast:strong_partisan_wave_1"                     
## [22] "strong_partisan_wave_1:freq_twitter_wave_1"           
## [23] "strong_partisan_wave_1:party_id_wave_1"               
## [24] "party_id_wave_1:political_wave_1"                     
##  [1] "(Intercept)"                                          
##  [2] "substantive_ideology_scale_wave_1"                    
##  [3] "percent_co_party"                                     
##  [4] "strong_partisan_wave_1"                               
##  [5] "political_wave_1"                                     
##  [6] "substantive_ideology_scale_wave_1:family_income"      
##  [7] "substantive_ideology_scale_wave_1:education"          
##  [8] "substantive_ideology_scale_wave_1:freq_twitter_wave_1"
##  [9] "substantive_ideology_scale_wave_1:party_id_wave_1"    
## [10] "percent_co_party:south"                               
## [11] "percent_co_party:party_id_wave_1"                     
## [12] "percent_co_party:political_wave_1"                    
## [13] "birth_year:northeast"                                 
## [14] "birth_year:political_wave_1"                          
## [15] "family_income:ideo_homogeneity_offline"               
## [16] "northeast:strong_partisan_wave_1"                     
## [17] "strong_partisan_wave_1:freq_twitter_wave_1"           
## [18] "strong_partisan_wave_1:party_id_wave_1"               
## [19] "party_id_wave_1:political_wave_1"                     
##  [1] "(Intercept)"                                          
##  [2] "substantive_ideology_scale_wave_1"                    
##  [3] "percent_co_party"                                     
##  [4] "strong_partisan_wave_1"                               
##  [5] "political_wave_1"                                     
##  [6] "substantive_ideology_scale_wave_1:family_income"      
##  [7] "substantive_ideology_scale_wave_1:education"          
##  [8] "substantive_ideology_scale_wave_1:freq_twitter_wave_1"
##  [9] "substantive_ideology_scale_wave_1:party_id_wave_1"    
## [10] "percent_co_party:south"                               
## [11] "percent_co_party:party_id_wave_1"                     
## [12] "percent_co_party:political_wave_1"                    
## [13] "birth_year:northeast"                                 
## [14] "birth_year:political_wave_1"                          
## [15] "family_income:ideo_homogeneity_offline"               
## [16] "strong_partisan_wave_1:freq_twitter_wave_1"           
## [17] "strong_partisan_wave_1:party_id_wave_1"               
## [18] "party_id_wave_1:political_wave_1"                     
##  [1] "(Intercept)"                                          
##  [2] "substantive_ideology_scale_wave_1"                    
##  [3] "percent_co_party"                                     
##  [4] "political_wave_1"                                     
##  [5] "substantive_ideology_scale_wave_1:family_income"      
##  [6] "substantive_ideology_scale_wave_1:education"          
##  [7] "substantive_ideology_scale_wave_1:freq_twitter_wave_1"
##  [8] "substantive_ideology_scale_wave_1:party_id_wave_1"    
##  [9] "percent_co_party:south"                               
## [10] "percent_co_party:party_id_wave_1"                     
## [11] "percent_co_party:political_wave_1"                    
## [12] "birth_year:northeast"                                 
## [13] "birth_year:political_wave_1"                          
## [14] "family_income:ideo_homogeneity_offline"               
## [15] "strong_partisan_wave_1:freq_twitter_wave_1"           
## [16] "party_id_wave_1:political_wave_1"                     
##  [1] "(Intercept)"                                      
##  [2] "substantive_ideology_scale_wave_1"                
##  [3] "percent_co_party"                                 
##  [4] "political_wave_1"                                 
##  [5] "substantive_ideology_scale_wave_1:education"      
##  [6] "substantive_ideology_scale_wave_1:party_id_wave_1"
##  [7] "percent_co_party:south"                           
##  [8] "percent_co_party:party_id_wave_1"                 
##  [9] "percent_co_party:political_wave_1"                
## [10] "birth_year:northeast"                             
## [11] "birth_year:political_wave_1"                      
## [12] "strong_partisan_wave_1:freq_twitter_wave_1"       
## [13] "party_id_wave_1:political_wave_1"                 
##  [1] "(Intercept)"                                      
##  [2] "substantive_ideology_scale_wave_1"                
##  [3] "political_wave_1"                                 
##  [4] "substantive_ideology_scale_wave_1:party_id_wave_1"
##  [5] "percent_co_party:south"                           
##  [6] "percent_co_party:political_wave_1"                
##  [7] "birth_year:northeast"                             
##  [8] "birth_year:political_wave_1"                      
##  [9] "strong_partisan_wave_1:freq_twitter_wave_1"       
## [10] "party_id_wave_1:political_wave_1"                 
##  [1] "(Intercept)"                                      
##  [2] "substantive_ideology_scale_wave_1"                
##  [3] "political_wave_1"                                 
##  [4] "substantive_ideology_scale_wave_1:education"      
##  [5] "substantive_ideology_scale_wave_1:party_id_wave_1"
##  [6] "percent_co_party:south"                           
##  [7] "percent_co_party:political_wave_1"                
##  [8] "birth_year:northeast"                             
##  [9] "birth_year:political_wave_1"                      
## [10] "family_income:ideo_homogeneity_offline"           
## [11] "strong_partisan_wave_1:freq_twitter_wave_1"       
## [12] "party_id_wave_1:political_wave_1"                 
## [1] "(Intercept)"                                      
## [2] "substantive_ideology_scale_wave_1"                
## [3] "political_wave_1"                                 
## [4] "substantive_ideology_scale_wave_1:party_id_wave_1"
## [5] "percent_co_party:political_wave_1"                
## [6] "strong_partisan_wave_1:freq_twitter_wave_1"       
## [7] "party_id_wave_1:political_wave_1"

# later examine number of times each covariate shows up in models
## now we decide which variables to add
rownames(non_zero.x) <- colnames(df.x.mod)
sort(apply(non_zero.x,1,sum))

##                                            party_id_wave_1 
##                                                          0 
##                                    northeast:north_central 
##                                                          0 
##                                            northeast:south 
##                                                          0 
##                                       friends_count_wave_1 
##                                                          2 
##                                                 birth_year 
##                                                          2 
##                                              family_income 
##                                                          2 
##                                                  education 
##                                                          2 
##                                                     gender 
##                                                          2 
##                                   ideo_homogeneity_offline 
##                                                          2 
##                                                  northeast 
##                                                          2 
##                                              north_central 
##                                                          2 
##                                                      south 
##                                                          2 
##         substantive_ideology_scale_wave_1:percent_co_party 
##                                                          2 
##     substantive_ideology_scale_wave_1:friends_count_wave_1 
##                                                          2 
##               substantive_ideology_scale_wave_1:birth_year 
##                                                          2 
##                substantive_ideology_scale_wave_1:northeast 
##                                                          2 
##            substantive_ideology_scale_wave_1:north_central 
##                                                          2 
##   substantive_ideology_scale_wave_1:strong_partisan_wave_1 
##                                                          2 
##                                percent_co_party:birth_year 
##                                                          2 
##                             percent_co_party:family_income 
##                                                          2 
##                                 percent_co_party:northeast 
##                                                          2 
##                    percent_co_party:strong_partisan_wave_1 
##                                                          2 
##                       percent_co_party:freq_twitter_wave_1 
##                                                          2 
##                            friends_count_wave_1:birth_year 
##                                                          2 
##                             friends_count_wave_1:education 
##                                                          2 
##                                friends_count_wave_1:gender 
##                                                          2 
##              friends_count_wave_1:ideo_homogeneity_offline 
##                                                          2 
##                             friends_count_wave_1:northeast 
##                                                          2 
##                         friends_count_wave_1:north_central 
##                                                          2 
##                                 friends_count_wave_1:south 
##                                                          2 
##                   friends_count_wave_1:freq_twitter_wave_1 
##                                                          2 
##                       friends_count_wave_1:party_id_wave_1 
##                                                          2 
##                      friends_count_wave_1:political_wave_1 
##                                                          2 
##                                   birth_year:family_income 
##                                                          2 
##                                       birth_year:education 
##                                                          2 
##                                          birth_year:gender 
##                                                          2 
##                        birth_year:ideo_homogeneity_offline 
##                                                          2 
##                                   birth_year:north_central 
##                                                          2 
##                             birth_year:freq_twitter_wave_1 
##                                                          2 
##                                 birth_year:party_id_wave_1 
##                                                          2 
##                                        family_income:south 
##                                                          2 
##                              family_income:party_id_wave_1 
##                                                          2 
##                             family_income:political_wave_1 
##                                                          2 
##                                           education:gender 
##                                                          2 
##                         education:ideo_homogeneity_offline 
##                                                          2 
##                              education:freq_twitter_wave_1 
##                                                          2 
##                                  education:party_id_wave_1 
##                                                          2 
##                                 education:political_wave_1 
##                                                          2 
##                            gender:ideo_homogeneity_offline 
##                                                          2 
##                                           gender:northeast 
##                                                          2 
##                                       gender:north_central 
##                                                          2 
##                              gender:strong_partisan_wave_1 
##                                                          2 
##                                 gender:freq_twitter_wave_1 
##                                                          2 
##                                     gender:party_id_wave_1 
##                                                          2 
##                         ideo_homogeneity_offline:northeast 
##                                                          2 
##                             ideo_homogeneity_offline:south 
##                                                          2 
##               ideo_homogeneity_offline:freq_twitter_wave_1 
##                                                          2 
##                   ideo_homogeneity_offline:party_id_wave_1 
##                                                          2 
##                  ideo_homogeneity_offline:political_wave_1 
##                                                          2 
##                              northeast:freq_twitter_wave_1 
##                                                          2 
##                                  northeast:party_id_wave_1 
##                                                          2 
##                                 northeast:political_wave_1 
##                                                          2 
##                       north_central:strong_partisan_wave_1 
##                                                          2 
##                          north_central:freq_twitter_wave_1 
##                                                          2 
##                              north_central:party_id_wave_1 
##                                                          2 
##                             north_central:political_wave_1 
##                                                          2 
##                                      south:party_id_wave_1 
##                                                          2 
##                       freq_twitter_wave_1:political_wave_1 
##                                                          2 
##                                         percent_co_party.2 
##                                                          2 
##                                 ideo_homogeneity_offline.2 
##                                                          2 
##                                     friends_count_wave_1.2 
##                                                          2 
##                                                education.2 
##                                                          2 
##                                            family_income.2 
##                                                          2 
##                                               birth_year.2 
##                                                          2 
##                                        freq_twitter_wave_1 
##                                                          4 
## substantive_ideology_scale_wave_1:ideo_homogeneity_offline 
##                                                          4 
##         substantive_ideology_scale_wave_1:political_wave_1 
##                                                          4 
##                      percent_co_party:friends_count_wave_1 
##                                                          4 
##                                    percent_co_party:gender 
##                                                          4 
##                  percent_co_party:ideo_homogeneity_offline 
##                                                          4 
##                         friends_count_wave_1:family_income 
##                                                          4 
##                friends_count_wave_1:strong_partisan_wave_1 
##                                                          4 
##                                           birth_year:south 
##                                                          4 
##                          birth_year:strong_partisan_wave_1 
##                                                          4 
##                                family_income:north_central 
##                                                          4 
##                       family_income:strong_partisan_wave_1 
##                                                          4 
##                                    education:north_central 
##                                                          4 
##                                            education:south 
##                                                          4 
##                           education:strong_partisan_wave_1 
##                                                          4 
##                     ideo_homogeneity_offline:north_central 
##                                                          4 
##            ideo_homogeneity_offline:strong_partisan_wave_1 
##                                                          4 
##                                        north_central:south 
##                                                          4 
##                               south:strong_partisan_wave_1 
##                                                          4 
##                                  south:freq_twitter_wave_1 
##                                                          4 
##                                     south:political_wave_1 
##                                                          4 
##                    strong_partisan_wave_1:political_wave_1 
##                                                          4 
##                        freq_twitter_wave_1:party_id_wave_1 
##                                                          4 
##                                 percent_co_party:education 
##                                                          6 
##                             percent_co_party:north_central 
##                                                          6 
##                                       family_income:gender 
##                                                          6 
##                                               gender:south 
##                                                          6 
##                                    gender:political_wave_1 
##                                                          6 
##                   substantive_ideology_scale_wave_1:gender 
##                                                          7 
##                                    family_income:education 
##                                                          7 
##                          family_income:freq_twitter_wave_1 
##                                                          7 
##                                        education:northeast 
##                                                          7 
##                        substantive_ideology_scale_wave_1.2 
##                                                          7 
##                    substantive_ideology_scale_wave_1:south 
##                                                          8 
##                                    family_income:northeast 
##                                                         10 
##                     strong_partisan_wave_1:party_id_wave_1 
##                                                         11 
##                                     strong_partisan_wave_1 
##                                                         12 
##                           northeast:strong_partisan_wave_1 
##                                                         12 
##            substantive_ideology_scale_wave_1:family_income 
##                                                         14 
##      substantive_ideology_scale_wave_1:freq_twitter_wave_1 
##                                                         14 
##                                           percent_co_party 
##                                                         15 
##                           percent_co_party:party_id_wave_1 
##                                                         15 
##                     family_income:ideo_homogeneity_offline 
##                                                         15 
##                substantive_ideology_scale_wave_1:education 
##                                                         16 
##                                     percent_co_party:south 
##                                                         18 
##                                birth_year:political_wave_1 
##                                                         19 
##                                       birth_year:northeast 
##                                                         21 
##                                                (Intercept) 
##                                                         22 
##                          substantive_ideology_scale_wave_1 
##                                                         22 
##                                           political_wave_1 
##                                                         22 
##          substantive_ideology_scale_wave_1:party_id_wave_1 
##                                                         22 
##                          percent_co_party:political_wave_1 
##                                                         22 
##                 strong_partisan_wave_1:freq_twitter_wave_1 
##                                                         22 
##                           party_id_wave_1:political_wave_1 
##                                                         22

#print(vars.x)
to_include_gop <- c(vars.x[-1]) # can add variables of importance here```

#Note that the `echo = FALSE` parameter was added to the code chunk to prevent printing of the R code that generated the plot.

Two Stage Least Squares

## for 2SLS 

df.2sls <- data.frame(cbind(df.x.mod,df$Y,df$W,df$perfect_complier,
                            df$half_complier,df$bot_followers,df$bin_maker))

colnames(df.2sls) <- c(colnames(df.x.mod),"Y","W","perfect_complier",
                       "half_complier","bot_followers","bin_maker")

# include outcome and instrument variables now
democrats <- df.2sls[dems,]
republicans <- df.2sls[gop,]

datasets <- list(democrats,republicans)
to_include <- list(to_include_dems,to_include_gop)

# re-create functions to calculate CACE with vars from LASSO
# use vars[-1] to remove intercept
CACE_int_fc<-function(data, to_include){      ## *consider manually inputting main effects*
  results<-ivreg(paste("Y ~perfect_complier+",
                       paste(to_include, collapse = "+"), "+as.factor(bin_maker)| W +",
                       paste(to_include, collapse = "+"),"+as.factor(bin_maker)"),
                 data = data)
  #calculate cluster robust standard errors
  data$bin_maker<-as.numeric(data$bin_maker)
  output<-cluster.robust.se(results, data$bin_maker)[2,]
  return(output)}

CACE_int_hc<-function(data, to_include){       ## *consider manually inputting main effects*
  results<-ivreg(paste("Y ~half_complier+",
                       paste(to_include, collapse = "+"),"+as.factor(bin_maker)| W +",
                       paste(to_include, collapse = "+"),"+as.factor(bin_maker)"),
                 data = data)
  #calculate cluster robust standard errors
  data$bin_maker<-as.numeric(data$bin_maker)
  output<-cluster.robust.se(results, data$bin_maker)[2,]
  return(output)}

CACE_int_bf<-function(data, to_include){       ## *consider manually inputting main effects*
  results<-ivreg(paste("Y ~bot_followers+",
                       paste(to_include, collapse = "+"),"+as.factor(bin_maker)| W +",
                       paste(to_include, collapse = "+"),"+as.factor(bin_maker)"),
                 data = data)
  #calculate cluster robust standard errors
  data$bin_maker<-as.numeric(data$bin_maker)
  output<-cluster.robust.se(results, data$bin_maker)[2,]
  return(output)}

full_compliance_int_models <- mapply(CACE_int_fc,datasets,to_include)

## [1] "Cluster Robust Standard Errors"
## [1] "Cluster Robust Standard Errors"

half_compliance_int_models <- mapply(CACE_int_hc,datasets,to_include)

## [1] "Cluster Robust Standard Errors"
## [1] "Cluster Robust Standard Errors"

bot_follower_int_models <- mapply(CACE_int_bf,datasets,to_include)

## [1] "Cluster Robust Standard Errors"
## [1] "Cluster Robust Standard Errors"

CACE Results

republican_full_compliance_int_cace<-as.data.frame(t(full_compliance_int_models[,2]))
republican_full_compliance_int_cace$sample<-"republicans_full_compliance"
republican_full_compliance_int_cace$party<-"republicans"
names(republican_full_compliance_int_cace)<-c("estimate","se","t","p","sample","party")

republican_half_compliance_int_cace<-as.data.frame(t(half_compliance_int_models[,2]))
republican_half_compliance_int_cace$sample<-"republicans_half_compliance"
republican_half_compliance_int_cace$party<-"republicans"
names(republican_half_compliance_int_cace)<-c("estimate","se","t","p","sample","party")

republican_bot_follower_int_cace<-as.data.frame(t(bot_follower_int_models[,2]))
republican_bot_follower_int_cace$sample<-"republicans_bot_follower"
republican_bot_follower_int_cace$party<-"republicans"
names(republican_bot_follower_int_cace)<-c("estimate","se","t","p","sample","party")
#extract results for democrats
democrat_full_compliance_int_cace<-data.frame(t(full_compliance_int_models[,1]))
democrat_full_compliance_int_cace$sample<-"democrats_full_compliance"
democrat_full_compliance_int_cace$party<-"democrats"
names(democrat_full_compliance_int_cace)<-c("estimate","se","t","p","sample","party")

democrat_half_compliance_int_cace<-as.data.frame(t(half_compliance_int_models[,1]))
democrat_half_compliance_int_cace$sample<-"democrats_half_compliance"
democrat_half_compliance_int_cace$party<-"democrats"
names(democrat_half_compliance_int_cace)<-c("estimate","se","t","p","sample","party")

democrat_bot_follower_int_cace<-as.data.frame(t(bot_follower_int_models[,1]))
democrat_bot_follower_int_cace$sample<-"democrats_bot_follower"
democrat_bot_follower_int_cace$party<-"democrats"
names(democrat_bot_follower_int_cace)<-c("estimate","se","t","p","sample","party")

CACE Plot

#create another dataset that combines ITT and CACE results for plotting
republican_itt<-
  data.frame(t(summary(republican_ITT_int_model, cluster="bin_maker")$coefficients[2:2,]))
names(republican_itt)<-c("estimate","se","t","p")
republican_itt$sample<-"republicans_itt"
republican_itt$party<-"republicans"
democrat_itt<-
  data.frame(t(summary(democrat_ITT_int_model, cluster="bin_maker")$coefficients[2:2,]))
names(democrat_itt)<-c("estimate","se","t","p")
democrat_itt$sample<-"democrats_itt"
democrat_itt$party<-"democrats"
republican_int_plot<-rbind(republican_full_compliance_int_cace,
                           republican_half_compliance_int_cace,
                           republican_bot_follower_int_cace,
                           republican_itt)
democrat_int_plot<-rbind(democrat_full_compliance_int_cace,
                         democrat_half_compliance_int_cace,
                         democrat_bot_follower_int_cace,
                         democrat_itt)
republican_int_plot$sample<-factor(republican_int_plot$sample,
                                   levels=c("republicans_full_compliance",
                                            "republicans_half_compliance",
                                            "republicans_bot_follower",
                                            "republicans_itt"),
                                   labels=c("Fully Compliant Respondents",
                                            "Partially Compliant Respondents",
                                            "Minimally Compliant Respondents",
                                            "Respondents Assigned to Treatment"))
democrat_int_plot$sample<-factor(democrat_int_plot$sample,
                                 levels=c("democrats_full_compliance",
                                          "democrats_half_compliance",
                                          "democrats_bot_follower",
                                          "democrats_itt"),
                                 labels=c("Fully Compliant Respondents",
                                          "Partially Compliant Respondents",
                                          "Minimally Compliant Respondents",
                                          "Respondents Assigned to Treatment"))
#create standard error bars
interval1 <- -qnorm((1-0.9)/2) # 90% multiplier
interval2 <- -qnorm((1-0.95)/2) # 95% multiplier
#create plot
library(ggplot2)

## 
## Attaching package: 'ggplot2'

## The following objects are masked from 'package:psych':
## 
##     %+%, alpha

figure_2_dems<-ggplot(democrat_int_plot)+
  geom_hline(yintercept = 0, colour = gray(1/2), lty = 2)+
  geom_point(aes(x=sample, y=estimate),
             position = position_dodge(width = 1/2),
             size=2, colour="blue")+
  geom_linerange(aes(x = sample, ymin = estimate - se*interval1,
                     ymax = estimate + se*interval1),
                 lwd = 1, position = position_dodge(width = 1/2),
                 colour="blue")+
  geom_linerange(aes(x = sample, y = estimate, ymin = estimate - se*interval2,
                     ymax = estimate + se*interval2),
                 lwd = .5, position = position_dodge(width = 1/2),
                 colour="blue")+
  theme(axis.text=element_text(size=9, face="bold",colour="black"),
        plot.title = element_text(face="bold", size=16, hjust = 0.5,vjust=3),
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        panel.background=element_blank(),
        axis.title=element_text(size=9, colour="black"),
        legend.position="none",
        legend.key = element_blank(),
        legend.title=element_blank())+
  ylim(c(-1,1))+
  labs(x="",y="")+
  coord_flip()+
  ggtitle(fig_title("Democrats"))

## Warning: Ignoring unknown aesthetics: y

figure_2_reps<-ggplot(republican_int_plot)+
  geom_hline(yintercept = 0, colour = gray(1/2), lty = 2)+
  geom_point(aes(x=sample, y=estimate),
             position = position_dodge(width = 1/2),
             size=2, colour="red")+
  geom_linerange(aes(x = sample, ymin = estimate - se*interval1,
                     ymax = estimate + se*interval1),
                 lwd = 1, position = position_dodge(width = 1/2),
                 colour="red")+
  geom_linerange(aes(x = sample, y = estimate, ymin = estimate - se*interval2,
                     ymax = estimate + se*interval2),
                 lwd = .5, position = position_dodge(width = 1/2),
                 colour="red")+
  theme(axis.text=element_text(size=9, face="bold",colour="black"),
        plot.title = element_text(face="bold", size=16, hjust = 0.5, vjust=3),
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        panel.background=element_blank(),
        axis.title=element_text(size=9, colour="black"),
        legend.position="none",
        legend.key = element_blank(),
        legend.title=element_blank())+
  labs(x="",y="")+
  ylim(c(-1,1))+
  coord_flip()+
  ggtitle(fig_title("Republicans"))

## Warning: Ignoring unknown aesthetics: y

figure_2_dems

figure_2_reps

ggsave(figure_2_dems, file="Figure 2 dems.png", width=7, height=4, dpi=1000)
ggsave(figure_2_reps, file="Figure 2 reps.png", width=7, height=4, dpi=1000)

5. AIPW

Augmented Inverse Propensity Weighting is used to estimate the treatment effects for “full compliers.” The Lasso model is used to estimate both propensities and outcomes, and it is assessed over a grid of Lambda values.

AIPW Setup

###############################################################
## AIPW on full compliance
###############################################################

# for each group, restrict sample to those who were treated
dems = c(df$party_id_wave_1 == 1 & df$W == 1)
gop = c(df$party_id_wave_1 == 2 & df$W == 1)


# will use democrats and republicans full compliance, 
# even though we are getting into small n sizes here
full_comp = as.numeric(as.character(df$perfect_complier))
full_comp.dem = full_comp[dems]
full_comp.rep = full_comp[gop]

democrats <- df.x.mod[dems,]
republicans <- df.x.mod[gop,]

AIPW Democrats

#### democrats
# propensities
democrats.propensity = amlinear:::crossfit.cv.glmnet(
  democrats, full_comp.dem,family = "binomial",keep = TRUE,
  weights = df$weights[dems],
  lambda = seq(0.001,0.1,by = 0.001))

theta.hat = amlinear:::crossfit.predict(democrats.propensity)
p_lasso = 1/(1 + exp(-theta.hat))

# inverse propensity score weighting 

G <- ((full_comp.dem - p_lasso) * df$Y[dems]) / 
  (p_lasso * (1 - p_lasso))
tau.hat <- mean(G)
se.hat <- sqrt(var(G) / (length(G) - 1))
c(ATE=tau.hat, lower_ci = tau.hat - 1.96 * se.hat, upper_ci = tau.hat + 1.96 * se.hat)

##        ATE   lower_ci   upper_ci 
## -0.2750413 -0.9068217  0.3567391

# outcomes
Xmod.dem = cbind(full_comp.dem,democrats, 
                       (2 * full_comp.dem - 1) * democrats)
democrats.outcome = amlinear:::crossfit.cv.glmnet(
  Xmod.dem, df$Y[dems], keep = TRUE,
  weights = df$weights[dems],
  penalty.factor = c(0, rep(1, ncol(Xmod.dem) - 1)),
  lambda = seq(0.001,0.1,by = 0.001)) # for simplicity, use same lambda grid

yhat.noncomp.dem = amlinear:::crossfit.predict(democrats.outcome,
                                                 cbind(0, democrats, -democrats))
yhat.comp.dem = amlinear:::crossfit.predict(democrats.outcome,
                                                 cbind(1, democrats, democrats))

# estimate effects
mean(yhat.comp.dem - yhat.noncomp.dem) # estimate ATE

## [1] 0.02783164

G = yhat.comp.dem - yhat.noncomp.dem +  # augmented ipw 
  full_comp.dem / p_lasso * 
  (df$Y[dems] - yhat.comp.dem) -
  (1 - full_comp.dem) / (1 - p_lasso) * 
  (df$Y[dems] - yhat.noncomp.dem)
tau.hat = mean(G)
se.hat = sqrt(var(G) / length(G))
tauhat_lasso_aipw = c(ATE=tau.hat,
                      lower_ci=tau.hat-1.96*se.hat,
                      upper_ci=tau.hat+1.96*se.hat)
tauhat_lasso_aipw

##         ATE    lower_ci    upper_ci 
##  0.05203412 -0.04894128  0.15300951

Lambda Grid Democrats

# examine predictions of ATE from grid of various lambda values
p_lasso_grid <- democrats.propensity$fit.preval

democrats.outcome.comp = amlinear:::crossfit.cv.glmnet(
  Xmod.dem[full_comp.dem == 1,], df$Y[dems][full_comp.dem == 1], keep = TRUE,
  weights = df$weights[dems][full_comp.dem == 1],
  lambda = democrats.propensity$lambda) # for simplicity, use same lambda grid
democrats.outcome.noncomp = amlinear:::crossfit.cv.glmnet(
  Xmod.dem[full_comp.dem == 0,], df$Y[dems][full_comp.dem == 0], keep = TRUE,
  weights = df$weights[dems][full_comp.dem == 0],
  lambda = democrats.propensity$lambda) # for simplicity, use same lambda grid

# cross fitted predictions on control and treated groups
cross_fit_comp = democrats.outcome.comp$fit.preval
cross_fit_noncomp = democrats.outcome.noncomp$fit.preval

# predictions on control group from treated model, and vice versa
democrats_predict_comp = predict.cv.glmnet(democrats.outcome.comp,
                                           newx = Xmod.dem[full_comp.dem == 0,],
                                           s = democrats.propensity$lambda)
democrats_predict_noncomp = predict.cv.glmnet(democrats.outcome.noncomp,
                                           newx = Xmod.dem[full_comp.dem == 1,],
                                           s = democrats.propensity$lambda)
# combine predictions
yhat.comp.dem_grid = rbind(cross_fit_noncomp,democrats_predict_noncomp)
yhat.noncomp.dem_grid = rbind(cross_fit_comp,democrats_predict_comp)

# estimate ATE grid
tauhat_lasso_ols_grid  = apply(yhat.comp.dem_grid - 
                                 yhat.noncomp.dem_grid,2,mean) 

library(ggplot2)
ggplot(mapping = aes(x = democrats.propensity$lambda, y = tauhat_lasso_ols_grid))+
  geom_line() + xlab("Lambda") + ylab("Tau Hat") + ggtitle(fig_title("Lasso ATE by Lambda Values"))

G_grid = yhat.comp.dem_grid - yhat.noncomp.dem_grid +  # augmented ipw 
  full_comp.dem / p_lasso_grid * 
  (df$Y[dems] - yhat.comp.dem_grid) -
  (1 - full_comp.dem) / (1 - p_lasso_grid) * 
  (df$Y[dems] - yhat.noncomp.dem_grid)
tau.hat_grid = apply(G_grid, 2, mean)
se.hat_grid = apply(G_grid, 2, function(x) sqrt(var(x) / length(x)))
tauhat_lasso_aipw_grid = rbind(ATE=tau.hat_grid,
                               lower_ci=tau.hat_grid-1.96*se.hat_grid,
                               upper_ci=tau.hat_grid+1.96*se.hat_grid)
tauhat_lasso_aipw_grid

##                    1           2           3           4           5
## ATE       0.19367713  0.19395150  0.19421819  0.19460987  0.19506864
## lower_ci -0.05105801 -0.05110167 -0.05118444 -0.05111431 -0.05097443
## upper_ci  0.43841228  0.43900468  0.43962082  0.44033405  0.44111170
##                    6           7           8           9          10
## ATE       0.19563072  0.19618703  0.19675711  0.19730368  0.19786752
## lower_ci -0.05075151 -0.05053896 -0.05031869 -0.05011824 -0.04990541
## upper_ci  0.44201295  0.44291303  0.44383290  0.44472560  0.44564045
##                   11          12         13          14          15
## ATE       0.19844284  0.19903767  0.1999154  0.20091367  0.20191583
## lower_ci -0.04968742 -0.04945636 -0.0489934 -0.04843362 -0.04788074
## upper_ci  0.44657311  0.44753170  0.4488242  0.45026095  0.45171239
##                   16          17          18          19          20
## ATE       0.20288151  0.20383829  0.20466211  0.20537598  0.20609732
## lower_ci -0.04736563 -0.04687692 -0.04658316 -0.04638031 -0.04618064
## upper_ci  0.45312864  0.45455351  0.45590737  0.45713228  0.45837528
##                   21          22          23          24          25
## ATE       0.20694895  0.20786119  0.20879005  0.20972200  0.21061607
## lower_ci -0.04597628 -0.04578224 -0.04558689 -0.04539792 -0.04525014
## upper_ci  0.45987418  0.46150461  0.46316699  0.46484191  0.46648228
##                   26          27          28          29          30
## ATE       0.21151832  0.21237840  0.21311325  0.21387620  0.21462946
## lower_ci -0.04508731 -0.04494833 -0.04500909 -0.04509582 -0.04520156
## upper_ci  0.46812395  0.46970514  0.47123560  0.47284821  0.47446048
##                  31          32          33          34          35
## ATE       0.2155296  0.21676673  0.21800648  0.21943238  0.22105809
## lower_ci -0.0452600 -0.04511213 -0.04501159 -0.04482438 -0.04452538
## upper_ci  0.4763193  0.47864558  0.48102456  0.48368914  0.48664155
##                   36          37          38          39          40
## ATE       0.22247762  0.22374665  0.22504710  0.22652852  0.22818528
## lower_ci -0.04435583 -0.04430629 -0.04426453 -0.04401078 -0.04350843
## upper_ci  0.48931106  0.49179959  0.49435873  0.49706782  0.49987900
##                   41          42          43          44          45
## ATE       0.22983766  0.23144337  0.23298855  0.23472614  0.23640501
## lower_ci -0.04304662 -0.04269243 -0.04250429 -0.04221522 -0.04192671
## upper_ci  0.50272195  0.50557917  0.50848140  0.51166750  0.51473673
##                   46          47          48          49          50
## ATE       0.23779015  0.23924243  0.24079499  0.24207248  0.24261448
## lower_ci -0.04184565 -0.04177272 -0.04164623 -0.04183592 -0.04257558
## upper_ci  0.51742595  0.52025757  0.52323621  0.52598088  0.52780453
##                   51          52         53          54          55
## ATE       0.24317938  0.24398148  0.2449908  0.24592678  0.24664199
## lower_ci -0.04345971 -0.04417657 -0.0448155 -0.04590539 -0.04728063
## upper_ci  0.52981847  0.53213954  0.5347971  0.53775894  0.54056461
##                   56          57          58          59          60
## ATE       0.24686495  0.24719824  0.24683544  0.24640924  0.24614443
## lower_ci -0.04914303 -0.05089211 -0.05329506 -0.05578224 -0.05829716
## upper_ci  0.54287293  0.54528859  0.54696594  0.54860071  0.55058601
##                   61          62          63          64          65
## ATE       0.24627024  0.24591154  0.24531380  0.24487014  0.24463672
## lower_ci -0.06033446 -0.06309194 -0.06629761 -0.06944154 -0.07230459
## upper_ci  0.55287494  0.55491502  0.55692522  0.55918182  0.56157804
##                   66          67          68          69          70
## ATE       0.24480455  0.24553550  0.24795189  0.25063342  0.25389480
## lower_ci -0.07539874 -0.07851833 -0.08063727 -0.08312465 -0.08591152
## upper_ci  0.56500784  0.56958934  0.57654105  0.58439149  0.59370112
##                   71          72          73         74         75
## ATE       0.25816412  0.26247360  0.26581254  0.2683005  0.2684164
## lower_ci -0.08860777 -0.09185586 -0.09540904 -0.1004282 -0.1077531
## upper_ci  0.60493602  0.61680306  0.62703412  0.6370291  0.6445858
##                  76         77         78         79         80         81
## ATE       0.2675123  0.2656679  0.2642859  0.2629072  0.2562281  0.2488556
## lower_ci -0.1165072 -0.1258329 -0.1353641 -0.1452951 -0.1586326 -0.1735568
## upper_ci  0.6515318  0.6571688  0.6639359  0.6711094  0.6710887  0.6712680
##                  82         83         84         85         86         87
## ATE       0.2446518  0.2419016  0.2374322  0.2362481  0.2337689  0.2313140
## lower_ci -0.1860716 -0.1963470 -0.2094798 -0.2216419 -0.2378523 -0.2586355
## upper_ci  0.6753752  0.6801503  0.6843441  0.6941380  0.7053901  0.7212635
##                  88         89         90         91         92         93
## ATE       0.2330857  0.2386550  0.2470183  0.2490893  0.2454772  0.2375329
## lower_ci -0.2822273 -0.3085899 -0.3420567 -0.3965019 -0.4885655 -0.6461573
## upper_ci  0.7483987  0.7858999  0.8360934  0.8946805  0.9795200  1.1212231
##                  94         95          96         97        98        99
## ATE       0.2288651  0.1375694  0.05243345  -1.526320 -14.12960 -163.0843
## lower_ci -0.9832778 -1.8052919 -3.55881233 -11.113143 -56.94303 -553.9782
## upper_ci  1.4410081  2.0804306  3.66367922   8.060502  28.68382  227.8096
##                 100
## ATE       -847.0080
## lower_ci -2404.0935
## upper_ci   710.0775

## plot tauhat grid results
ggplot()+
  geom_line(mapping = aes(x = democrats.propensity$lambda, y = tauhat_lasso_aipw_grid[1,]))+ 
  geom_line(mapping = aes(x = democrats.propensity$lambda, y = tauhat_lasso_aipw_grid[2,]),color = "green")+ 
  geom_line(mapping = aes(x = democrats.propensity$lambda, y = tauhat_lasso_aipw_grid[3,]),color = "green")+ 
  xlab("Lambda") + ylab("Tau Hat") + ylim(c(-10,10)) + ggtitle(fig_title("Estimated AIPW Treatment Effect by Lambda"))

## Warning: Removed 3 rows containing missing values (geom_path).

## Warning: Removed 4 rows containing missing values (geom_path).

## Warning: Removed 3 rows containing missing values (geom_path).

AIPW Republicans

#### republicans
# propensities
republicans.propensity = amlinear:::crossfit.cv.glmnet(
  republicans, full_comp.rep,family = "binomial",keep = TRUE,
  weights = df$weights[gop],
  lambda = seq(0.001,0.1,by = 0.001))

theta.hat = amlinear:::crossfit.predict(republicans.propensity)
p_lasso = 1/(1 + exp(-theta.hat))

# inverse propensity score weighting 

G <- ((full_comp.rep - p_lasso) * df$Y[gop]) / 
  (p_lasso * (1 - p_lasso))
tau.hat <- mean(G)
se.hat <- sqrt(var(G) / (length(G) - 1))
c(ATE=tau.hat, lower_ci = tau.hat - 1.96 * se.hat, upper_ci = tau.hat + 1.96 * se.hat)

##        ATE   lower_ci   upper_ci 
## -0.5733562 -1.9757520  0.8290395

# outcomes
Xmod.rep = cbind(full_comp.rep,republicans, 
                 (2 * full_comp.rep - 1) * republicans)
republicans.outcome = amlinear:::crossfit.cv.glmnet(
  Xmod.rep, df$Y[gop], keep = TRUE,
  weights = df$weights[gop],
  penalty.factor = c(0, rep(1, ncol(Xmod.rep) - 1)),
  lambda = seq(0.001,0.1,by = 0.001)) # for simplicity, use same lambda grid

yhat.noncomp.rep = amlinear:::crossfit.predict(republicans.outcome,
                                               cbind(0, republicans, -republicans))
yhat.comp.rep = amlinear:::crossfit.predict(republicans.outcome,
                                            cbind(1, republicans, republicans))

# estimate effects
mean(yhat.comp.rep - yhat.noncomp.rep) # estimate ATE

## [1] -0.1405493

G = yhat.comp.rep - yhat.noncomp.rep +  # augmented ipw 
  full_comp.rep / p_lasso * 
  (df$Y[gop] - yhat.comp.rep) -
  (1 - full_comp.rep) / (1 - p_lasso) * 
  (df$Y[gop] - yhat.noncomp.rep)
tau.hat = mean(G)
se.hat = sqrt(var(G) / length(G))
tauhat_lasso_aipw = c(ATE=tau.hat,
                      lower_ci=tau.hat-1.96*se.hat,
                      upper_ci=tau.hat+1.96*se.hat)
tauhat_lasso_aipw

##         ATE    lower_ci    upper_ci 
## -0.18217149 -0.31846921 -0.04587376

Lambda Grid Republicans

# examine predictions of ATE from grid of various lambda values
p_lasso_grid <- republicans.propensity$fit.preval

republicans.outcome.comp = amlinear:::crossfit.cv.glmnet(
  Xmod.rep[full_comp.rep == 1,], df$Y[gop][full_comp.rep == 1], keep = TRUE,
  weights = df$weights[gop][full_comp.rep == 1],
  lambda = republicans.propensity$lambda) # for simplicity, use same lambda grid
republicans.outcome.noncomp = amlinear:::crossfit.cv.glmnet(
  Xmod.rep[full_comp.rep == 0,], df$Y[gop][full_comp.rep == 0], keep = TRUE,
  weights = df$weights[gop][full_comp.rep == 0],
  lambda = republicans.propensity$lambda) # for simplicity, use same lambda grid

# cross fitted predictions on control and treated groups
cross_fit_comp = republicans.outcome.comp$fit.preval
cross_fit_noncomp = republicans.outcome.noncomp$fit.preval

# predictions on control group from treated model, and vice versa
republicans_predict_comp = predict.cv.glmnet(republicans.outcome.comp,
                                           newx = Xmod.rep[full_comp.rep == 0,],
                                           s = republicans.propensity$lambda)
republicans_predict_noncomp = predict.cv.glmnet(republicans.outcome.noncomp,
                                              newx = Xmod.rep[full_comp.rep == 1,],
                                              s = republicans.propensity$lambda)
# combine predictions
yhat.comp.rep_grid = rbind(cross_fit_noncomp,republicans_predict_noncomp)
yhat.noncomp.rep_grid = rbind(cross_fit_comp,republicans_predict_comp)

# estimate ATE grid
tauhat_lasso_ols_grid  = apply(yhat.comp.rep_grid - 
                                 yhat.noncomp.rep_grid,2,mean) 

library(ggplot2)
ggplot(mapping = aes(x = republicans.propensity$lambda, y = tauhat_lasso_ols_grid))+
  geom_line() + xlab("Lambda") + ylab("Tau Hat") + ggtitle("Lasso ATE by Lambda Values")

G_grid = yhat.comp.rep_grid - yhat.noncomp.rep_grid +  # augmented ipw 
  full_comp.rep / p_lasso_grid * 
  (df$Y[gop] - yhat.comp.rep_grid) -
  (1 - full_comp.rep) / (1 - p_lasso_grid) * 
  (df$Y[gop] - yhat.noncomp.rep_grid)
tau.hat_grid = apply(G_grid, 2, mean)
se.hat_grid = apply(G_grid, 2, function(x) sqrt(var(x) / length(x)))
tauhat_lasso_aipw_grid = rbind(ATE=tau.hat_grid,
                               lower_ci=tau.hat_grid-1.96*se.hat_grid,
                               upper_ci=tau.hat_grid+1.96*se.hat_grid)
tauhat_lasso_aipw_grid

##                    1           2           3           4           5
## ATE      -0.33954121 -0.33851687 -0.33773414 -0.33711553 -0.33640625
## lower_ci -0.66077158 -0.65953990 -0.65863234 -0.65797704 -0.65707916
## upper_ci -0.01831083 -0.01749383 -0.01683594 -0.01625402 -0.01573334
##                    6           7           8          9         10
## ATE      -0.33560709 -0.33481155 -0.33403872 -0.3332809 -0.3324549
## lower_ci -0.65607726 -0.65509834 -0.65415588 -0.6532482 -0.6522173
## upper_ci -0.01513692 -0.01452476 -0.01392156 -0.0133135 -0.0126924
##                   11          12          13          14          15
## ATE      -0.33165371 -0.33093481 -0.33021787 -0.32955322 -0.32888446
## lower_ci -0.65117208 -0.65021969 -0.64928929 -0.64842588 -0.64757397
## upper_ci -0.01213534 -0.01164992 -0.01114646 -0.01068056 -0.01019494
##                    16           17           18           19           20
## ATE      -0.328185723 -0.327514700 -0.326856866 -0.326200080 -0.325618306
## lower_ci -0.646707808 -0.645874653 -0.645083091 -0.644312459 -0.643562428
## upper_ci -0.009663638 -0.009154748 -0.008630642 -0.008087702 -0.007674184
##                    21           22           23           24           25
## ATE      -0.325071400 -0.324652201 -0.324560467 -0.324615571 -0.324669943
## lower_ci -0.642876162 -0.642309217 -0.641983299 -0.641844084 -0.641724242
## upper_ci -0.007266638 -0.006995184 -0.007137636 -0.007387058 -0.007615644
##                    26           27           28           29           30
## ATE      -0.324713378 -0.324768886 -0.324959381 -0.325228124 -0.324594299
## lower_ci -0.641619811 -0.641548818 -0.641649515 -0.641848325 -0.640631425
## upper_ci -0.007806944 -0.007988954 -0.008269246 -0.008607924 -0.008557173
##                    31           32           33           34           35
## ATE      -0.323615151 -0.322684949 -0.321764912 -0.320861350 -0.319979934
## lower_ci -0.638880746 -0.637211566 -0.635585525 -0.634033547 -0.632557210
## upper_ci -0.008349556 -0.008158332 -0.007944299 -0.007689154 -0.007402658
##                    36           37           38           39           40
## ATE      -0.319100158 -0.318593969 -0.317887167 -0.317302738 -0.316772571
## lower_ci -0.631143641 -0.630380772 -0.629643612 -0.629135602 -0.628764697
## upper_ci -0.007056675 -0.006807166 -0.006130723 -0.005469874 -0.004780446
##                    41           42           43            44
## ATE      -0.316141944 -0.315433953 -0.314749940 -0.3140916933
## lower_ci -0.628343509 -0.627907661 -0.627546683 -0.6272631401
## upper_ci -0.003940378 -0.002960246 -0.001953196 -0.0009202465
##                     45           46           47           48           49
## ATE      -0.3134493682 -0.312777640 -0.312146839 -0.311529460 -0.310880702
## lower_ci -0.6270755076 -0.626979254 -0.627011303 -0.627221031 -0.627664677
## upper_ci  0.0001767713  0.001423973  0.002717624  0.004162112  0.005903273
##                    50           51          52          53          54
## ATE      -0.310332191 -0.309930952 -0.30974756 -0.30986511 -0.31006456
## lower_ci -0.628334660 -0.629181059 -0.63028686 -0.63181540 -0.63335083
## upper_ci  0.007670277  0.009319154  0.01079174  0.01208518  0.01322172
##                   55          56          57          58          59
## ATE      -0.31078859 -0.31242702 -0.31425791 -0.31622435 -0.31848901
## lower_ci -0.63555080 -0.63915837 -0.64306919 -0.64741912 -0.65229780
## upper_ci  0.01397362  0.01430433  0.01455337  0.01497043  0.01531978
##                   60         61          62          63          64
## ATE      -0.32138685 -0.3248731 -0.32846868 -0.33331483 -0.33838673
## lower_ci -0.65806207 -0.6650527 -0.67222544 -0.68131264 -0.69074373
## upper_ci  0.01528838  0.0153064  0.01528808  0.01468298  0.01397026
##                   65          66          67          68          69
## ATE      -0.34364265 -0.34909870 -0.35390951 -0.35923521 -0.36599022
## lower_ci -0.70104458 -0.71181916 -0.72219540 -0.73347575 -0.74738620
## upper_ci  0.01375929  0.01362176  0.01437638  0.01500532  0.01540577
##                   70          71          72         73          74
## ATE      -0.37295698 -0.38099899 -0.39039816 -0.4005447 -0.41021360
## lower_ci -0.76175908 -0.77785995 -0.79630340 -0.8162773 -0.83599710
## upper_ci  0.01584512  0.01586197  0.01550708  0.0151879  0.01556991
##                   75          76          77         78          79
## ATE      -0.42031606 -0.43018489 -0.43736099 -0.4366269 -0.43204145
## lower_ci -0.85708406 -0.87896349 -0.89822475 -0.9071859 -0.91068350
## upper_ci  0.01645193  0.01859371  0.02350277  0.0339320  0.04660059
##                   80          81          82         83         84
## ATE      -0.42727267 -0.41310736 -0.39515304 -0.3771607 -0.3625413
## lower_ci -0.91503045 -0.90269057 -0.88695326 -0.8738357 -0.8688839
## upper_ci  0.06048512  0.07647585  0.09664718  0.1195143  0.1438012
##                  85         86         87         88         89        90
## ATE      -0.3502964 -0.3358345 -0.3241847 -0.3070115 -0.2859353 -0.268623
## lower_ci -0.8722131 -0.8756114 -0.8862905 -0.8972869 -0.9246608 -0.972235
## upper_ci  0.1716203  0.2039425  0.2379211  0.2832639  0.3527902  0.434989
##                  91         92         93         94         95        96
## ATE      -0.2578307 -0.2590486 -0.2644901 -0.4660162 -0.8627474 -1.508330
## lower_ci -1.0535907 -1.1764228 -1.3583567 -1.6263905 -2.1610131 -3.241440
## upper_ci  0.5379293  0.6583256  0.8293766  0.6943581  0.4355184  0.224781
##                  97         98         99       100
## ATE      -2.5575206 -10.054817  -53.91021  538.4492
## lower_ci -5.4244788 -25.299565 -168.26835 -896.1135
## upper_ci  0.3094375   5.189931   60.44794 1973.0119

## plot tauhat grid results
ggplot()+
  geom_line(mapping = aes(x = republicans.propensity$lambda, y = tauhat_lasso_aipw_grid[1,]))+ 
  geom_line(mapping = aes(x = republicans.propensity$lambda, y = tauhat_lasso_aipw_grid[2,]),color = "green")+ 
  geom_line(mapping = aes(x = republicans.propensity$lambda, y = tauhat_lasso_aipw_grid[3,]),color = "green")+ 
  xlab("Lambda") + ylab("Tau Hat") + ylim(c(-10,10)) + ggtitle(fig_title("Estimated AIPW Treatment Effect by Lambda"))

## Warning: Removed 3 rows containing missing values (geom_path).

## Warning: Removed 3 rows containing missing values (geom_path).

## Warning: Removed 2 rows containing missing values (geom_path).

Descriptive statistics

#Creating table of descriptive statistics of covariates
#Subset of numeric variables
nums <- unlist(lapply(final_data, is.numeric))  
summ_stats <- fBasics::basicStats(final_data[ , nums])
summ_stats <- as.data.frame(t(summ_stats))
summ_stats <- summ_stats[c("Mean", "Stdev", "Minimum", "1. Quartile", "Median",  "3. Quartile", "Maximum")]
colnames(summ_stats)[colnames(summ_stats) %in% c('1. Quartile', '3. Quartile')] <- c('Lower quartile', 'Upper quartile')
#Table
options(kableExtra.latex.load_packages = FALSE)
#options(knitr.table.format = "latex")
library(kableExtra)

## 
## Attaching package: 'kableExtra'

## The following object is masked from 'package:dplyr':
## 
##     group_rows

summ_stats_table <- kable(summ_stats, "html", digits = 2,
                          caption = tab_title("Descriptive Statistics"))
kable_styling(summ_stats_table,
              bootstrap_options=c("striped", "hover", "condensed", "responsive"),
              full_width=FALSE)

Table 3: Descriptive Statistics
	Mean	Stdev	Minimum	Lower quartile	Median	Upper quartile	Maximum
treat	0.60	0.49	0.00	0.00	1.00	1.00	1.00
perfect_complier	0.10	0.30	0.00	0.00	0.00	0.00	1.00
half_complier	0.27	0.44	0.00	0.00	0.00	1.00	1.00
bot_followers	0.36	0.48	0.00	0.00	0.00	1.00	1.00
party_id_wave_1	1.43	0.50	1.00	1.00	1.00	2.00	2.00
strong_partisan_wave_1	0.57	0.50	0.00	0.00	1.00	1.00	1.00
substantive_ideology_scale_wave_5	3.37	1.52	1.00	2.00	3.22	4.60	7.00
substantive_ideology_scale_wave_1	3.39	1.48	1.00	2.10	3.30	4.50	7.00
percent_co_party	0.47	0.28	0.00	0.21	0.63	0.69	0.69
friends_count_wave_1	6.77	0.20	6.73	6.73	6.73	6.73	8.33
birth_year	1965.55	15.39	1933.00	1954.00	1964.00	1974.00	1995.00
family_income	7.30	3.44	1.00	5.00	7.00	10.00	16.00
education	4.56	1.29	1.00	3.00	5.00	6.00	6.00
gender	1.52	0.50	1.00	1.00	2.00	2.00	2.00
ideo_homogeneity_offline	62.79	21.53	0.00	50.00	60.00	80.00	100.00
northeast	0.20	0.40	0.00	0.00	0.00	0.00	1.00
north_central	0.20	0.40	0.00	0.00	0.00	0.00	1.00
south	0.39	0.49	0.00	0.00	0.00	1.00	1.00
west	0.21	0.41	0.00	0.00	0.00	0.00	1.00

6. Causal Tree

The Causal Tree is a decision-tree estimator that can be used to assess heterogeneity in treatment effects. Because the single tree is modeled using random samples of covariates, the results are sensitive to the seed in the R software.

Data Split

##Split the dataset
#We'll start by separating a portion of the dataset as a test set. 
train_fraction <- 0.80  
n <- dim(df)[1]
train_idx <- sample.int(n, replace=F, size=floor(n*train_fraction))
data_train <- df[train_idx,]
data_test <- df[-train_idx,]
#Create further subsets of the data into splitting, estimation, and validation samples
split_size <- floor(nrow(data_train) * 0.5)
split_idx <- sample(nrow(data_train), replace=FALSE, size=split_size)
data_split <- data_train[split_idx,]
data_est <- data_train[-split_idx,]

Fitting the tree, cross-validation, and point estimates

seed <- 1 # start seed at 1, find seed that splits data
num_leaves <- 1 # arbitrary, so that while loop works
while(num_leaves == 1){
##Fit the tree
set.seed(seed)
library(causalTree)
#Defining a formula containing outcome and covariates
fmla_ct <- paste(" Y ~", paste(covariates, collapse = " + "))
#CT
ct_unpruned <- honest.causalTree(
  formula=fmla_ct,
  data=data_split,
  est_data=data_est,
  
  treatment=data_split$W,
  est_treatment=data_est$W,
  
  split.Rule="CT",
  cv.option="TOT", 
  split.Honest=TRUE,
  cv.Honest=TRUE,
  
  minsize=15,
  HonestSampleSize=nrow(data_est)
)

##Cross Validation
ct_cptable <- as.data.frame(ct_unpruned$cptable)
#Optimal complexity parameter to prune tree
selected_cp <- which.min(ct_cptable$xerror)
optim_cp_ct <- ct_cptable[selected_cp, "CP"]
# Prune the tree at optimal complexity parameter.
ct_pruned <- prune(tree=ct_unpruned, cp=optim_cp_ct)

##Point estimates on estimation sample
tauhat_ct_est <- predict(ct_pruned, newdata=data_est)
##Standard errors
#Create a factor column indicating leaf assignment
num_leaves <- length(unique(tauhat_ct_est))
data_est$leaf <- factor(tauhat_ct_est, labels = seq(num_leaves))
#print(num_leaves)
seed <- seed + 1
}

## Loading required package: rpart

## 
## Attaching package: 'rpart'

## The following object is masked from 'package:survival':
## 
##     solder

## Loading required package: rpart.plot

## Loading required package: data.table

## 
## Attaching package: 'data.table'

## The following objects are masked from 'package:dplyr':
## 
##     between, first, last

## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"

Point estimates and standard errors

# Run the regression
ols_ct <- lm(as.formula("Y ~ 0 + leaf + W:leaf"), data=data_est)
#Table of estimates and standard errors by leaf
ols_ct_summary <- summary(ols_ct)
te_summary <- coef(ols_ct_summary)[(num_leaves+1):(2*num_leaves), c("Estimate", "Std. Error")]
print(te_summary)

##            Estimate Std. Error
## leaf1:W -0.45243056  0.4181464
## leaf2:W -0.03359684  0.5006359
## leaf3:W  0.20366558  0.4145783
## leaf4:W  0.27407092  0.3380571
## leaf5:W  0.37760727  0.3687031
## leaf6:W  0.48298128  0.3659228
## leaf7:W  0.52777778  0.3831249
## leaf8:W  0.72357143  0.3998245

##Point estimates on test sample
tauhat_ct_test <- predict(ct_pruned, newdata=data_test)

Plotting tree

#Plot pruned tree
rpart.plot(
  x=ct_pruned,        # Pruned tree
  type=3,            
  fallen=TRUE,       
  leaf.round=1,     
  extra=100,          
  branch=.1,          
  box.palette="RdBu")
title(main = fig_title("Causal Tree"))

Test heterogeneity across leaves

# Null hypothesis: all leaf values are the same
hypothesis <- paste0("leaf1:W = leaf", seq(2, num_leaves), ":W")
ftest <- linearHypothesis(ols_ct, hypothesis, test="F")
#Table
kable_styling(kable(data.frame(ftest, check.names = FALSE, row.names = NULL)[2,],
              "html", digits = 4,
              caption=tab_title("Testing null hypothesis:<br> Average treatment effect is same across leaves"), booktabs = TRUE),
              bootstrap_options=c("striped", "hover", "condensed", "responsive"),
              full_width=FALSE)

Table 4: Testing null hypothesis:
Average treatment effect is same across leaves
	Res.Df	RSS	Df	Sum of Sq	F	Pr(>F)
2	397	740.4164	7	10.2772	0.7872	0.5983

Covariate heterogeneity

# Null hypothesis: the mean is equal across all leaves
hypothesis <- paste0("leaf1 = leaf", seq(2, num_leaves))
means_per_leaf <- matrix(nrow = num_leaves, ncol = num_leaves)
significance <- matrix(nrow = 2, ncol=length(covariates))
# Regress each covariate on leaf assignment to means p
cov_means <- lapply(covariates, function(covariate) {
  lm(paste0(covariate, ' ~ 0 + leaf'), data = data_est)
})
# Extract the mean and standard deviation of each covariate per leaf
cov_table <- lapply(cov_means, function(cov_mean) {
  as.data.frame(t(coef(summary(cov_mean))[,c("Estimate", "Std. Error")]))
})
# Test if means are the same across leaves
cov_ftests <- sapply(cov_means, function(cov_mean) {
  # Sometimes the regression has no residual (SSE = 0), 
  # so we cannot perform an F-test
  tryCatch({
    linearHypothesis(cov_mean, hypothesis)[2, c("F", "Pr(>F)")]
  },
    error = function(cond) {
      message(paste0("Error message during F-test for`", cov_mean$terms[[2]], "`:"))
      message(cond)
      return(c("F" = NA, "Pr(>F)" = NA))
    })
})

Table 5: Average covariate values in each leaf
covariates	leaf1	leaf2	leaf3	leaf4	leaf5	leaf6	leaf7	leaf8
percent_co_party	-0.302	0.638	-1.392	-1.28	0.719	0.703	0.638	0.685
	(0.076)	(0.09)	(0.076)	(0.062)	(0.069)	(0.07)	(0.073)	(0.076)
ideo_homogeneity_offline	-0.05	-0.256	-0.89	0.552	-0.201	0.385	-1.022	1.39
	(0.096)	(0.114)	(0.097)	(0.078)	(0.088)	(0.088)	(0.093)	(0.096)
friends_count_wave_1	-0.067	0.142	-0.182	-0.182	0.108	-0.083	0.034	-0.067
	(0.122)	(0.145)	(0.124)	(0.1)	(0.112)	(0.113)	(0.118)	(0.122)
education	-0.273	-0.115	-0.418	-0.046	0.37	-0.199	-0.039	0.391
	(0.143)	(0.17)	(0.145)	(0.117)	(0.132)	(0.133)	(0.139)	(0.143)
family_income	0.112	-0.285	-0.131	-0.06	0.172	-0.109	0.112	0.124
	(0.143)	(0.17)	(0.145)	(0.117)	(0.132)	(0.133)	(0.139)	(0.143)
birth_year	0.151	0.099	0.285	-0.053	-0.47	-0.013	-0.162	-0.333
	(0.136)	(0.161)	(0.137)	(0.111)	(0.125)	(0.126)	(0.132)	(0.136)
endtime_wave_5	2.204	-0.565	-0.296	-0.25	-0.574	-0.073	0.04	-0.316
	(0.09)	(0.107)	(0.091)	(0.074)	(0.083)	(0.083)	(0.087)	(0.09)
substantive_ideology_scale_wave_1	0.1	0.278	0.246	0.508	-0.529	-0.184	-0.47	-0.501
	(0.131)	(0.156)	(0.133)	(0.107)	(0.12)	(0.122)	(0.127)	(0.131)
gender	1.583	1.412	1.468	1.528	1.544	1.464	1.569	1.562
	(0.072)	(0.086)	(0.073)	(0.059)	(0.066)	(0.067)	(0.07)	(0.072)
northeast	0.25	0.147	0.234	0.236	0.281	0.089	0.255	0.25
	(0.06)	(0.071)	(0.06)	(0.049)	(0.055)	(0.055)	(0.058)	(0.06)
north_central	0.208	0.265	0.17	0.25	0.21	0.214	0.235	0.167
	(0.06)	(0.071)	(0.06)	(0.049)	(0.055)	(0.055)	(0.058)	(0.06)
south	0.375	0.441	0.362	0.417	0.281	0.411	0.314	0.271
	(0.069)	(0.082)	(0.07)	(0.057)	(0.064)	(0.064)	(0.067)	(0.069)
west	0.167	0.147	0.234	0.139	0.228	0.286	0.196	0.312
	(0.059)	(0.07)	(0.06)	(0.048)	(0.054)	(0.055)	(0.057)	(0.059)
party_id_wave_1	1.562	1.324	1.681	1.778	1.158	1.25	1.196	1.146
	(0.062)	(0.073)	(0.062)	(0.05)	(0.057)	(0.057)	(0.06)	(0.062)
political_wave_1	1.396	1.324	1.532	1.361	1.018	1.125	1.137	1.104
	(0.058)	(0.069)	(0.058)	(0.047)	(0.053)	(0.053)	(0.056)	(0.058)
strong_partisan_wave_1	0.562	0.147	0.447	0.528	1	0.679	0.608	0.75
	(0.064)	(0.076)	(0.064)	(0.052)	(0.058)	(0.059)	(0.062)	(0.064)
freq_twitter_wave_1	1.542	1.471	1.447	1.556	1.298	1.268	1.314	1.396
	(0.07)	(0.083)	(0.071)	(0.057)	(0.064)	(0.065)	(0.068)	(0.07)
bot_followers	0.333	0.471	0.34	0.389	0.421	0.286	0.333	0.417
	(0.07)	(0.083)	(0.071)	(0.057)	(0.064)	(0.065)	(0.068)	(0.07)
half_complier	0.188	0.353	0.192	0.208	0.333	0.232	0.196	0.292
	(0.062)	(0.074)	(0.063)	(0.051)	(0.057)	(0.057)	(0.06)	(0.062)
perfect_complier	0.042	0.147	0.192	0.167	0.088	0.107	0.078	0.083
	(0.046)	(0.054)	(0.046)	(0.037)	(0.042)	(0.042)	(0.044)	(0.046)
bin_maker	8.083	10.24	9.66	9.167	1.965	5.089	5.51	4.187
	(0.598)	(0.711)	(0.604)	(0.488)	(0.549)	(0.554)	(0.58)	(0.598)
caseid	703.6	856.5	796.8	770.3	232.3	469.9	503.9	406.5
	(43.51)	(51.7)	(43.97)	(35.53)	(39.93)	(40.28)	(42.21)	(43.51)

Table 6: Covariate variation across leaves
percent_co_party	0.7451
endtime_wave_5	0.6350
ideo_homogeneity_offline	0.5500
caseid	0.3210
bin_maker	0.3118
party_id_wave_1	0.2539
strong_partisan_wave_1	0.1934
substantive_ideology_scale_wave_1	0.1601
political_wave_1	0.1496
education	0.0684
birth_year	0.0585
freq_twitter_wave_1	0.0486
perfect_complier	0.0221
northeast	0.0214
west	0.0211
family_income	0.0199
half_complier	0.0192
friends_count_wave_1	0.0175
south	0.0153
bot_followers	0.0124
gender	0.0110
north_central	0.0058

7.Causal Forest

This section details the Causal Forest, which uses aggregated causal trees to assess treatment heterogeneity. The model is trained on the training data, and covariates are analyzed across trees.

Fitting the forest

# Adding a factor column turns all columns into character
data_train <- as.data.frame(apply(data_train, 2, as.numeric))
data_test <- as.data.frame(apply(data_test, 2, as.numeric))
library(grf)

## 
## Attaching package: 'grf'

## The following object is masked from 'package:amlinear':
## 
##     average_partial_effect

cf <- causal_forest(
  X = as.matrix(data_train[,covariates]),
  Y = data_train$Y,
  W = data_train$W,
  num.trees=500)

Point estimates & standard errors

##Training set (OOB)
oob_pred <- predict(cf, estimate.variance=TRUE)
oob_tauhat_cf <- oob_pred$predictions
oob_tauhat_cf_se <- sqrt(oob_pred$variance.estimates)
##Test set
test_pred <- predict(cf, newdata=as.matrix(data_test[,covariates]), estimate.variance=TRUE)
tauhat_cf_test <- test_pred$predictions
tauhat_cf_test_se <- sqrt(test_pred$variance.estimates)
cf_known_prop <- grf::causal_forest(
  X = as.matrix(data_train[covariates]),
  Y = data_train$Y,
  W = data_train$W,
  W.hat = rep(mean(data_train$W), times=nrow(data_train)))

Table 7: Causal Forest Out of Bag Predictions
predictions	variance.estimates	debiased.error
0.0003	0.0030	0.0101
0.0272	0.0091	0.2438
-0.0049	0.0096	0.0688

Table 8: Causal Forest Test Set Predictions
predictions	variance.estimates
-0.0349	0.0019
-0.0289	0.0068
-0.0365	0.0143

Subgroup heterogeneity

# Manually creating subgroups
num_tiles <- 4  # ntiles = CATE is above / below the median
data_train$cate <- oob_tauhat_cf
data_train$ntile <- factor(ntile(oob_tauhat_cf, n=num_tiles))

ols_sample_ate <- lm("Y ~ ntile + ntile:W", data=data_train)
estimated_sample_ate <- coef(summary(ols_sample_ate))[(num_tiles+1):(2*num_tiles), c("Estimate", "Std. Error")]
hypothesis_sample_ate <- paste0("ntile1:W = ", paste0("ntile", seq(2, num_tiles), ":W"))
ftest_pvalue_sample_ate <- linearHypothesis(ols_sample_ate, hypothesis_sample_ate)[2,"Pr(>F)"]

# Not the most elegent way to make this table, but it works
sample_ate_table <- data.frame(cbind(estimated_sample_ate,c("-",hypothesis_sample_ate),c(ftest_pvalue_sample_ate)))
colnames(sample_ate_table)[3:4] = c("Hypotheses","F-Test")
sample_ate_table[,c(1,2,4)] <- sample_ate_table[,c(1,2,4)] %>% 
  unlist() %>% as.character() %>% as.numeric()

kable_styling(kable(sample_ate_table, "html", digits = 4, booktabs = TRUE,
                    caption = tab_title("Sample ATE")),
              bootstrap_options=c("striped", "hover", "condensed", "responsive"),
              full_width=FALSE)

Table 9: Sample ATE
	Estimate	Std..Error	Hypotheses	F-Test
ntile1:W	0.1482	0.1853		0.9206
ntile2:W	0.0797	0.1884	ntile1:W = ntile2:W	0.9206
ntile3:W	-0.0359	0.1892	ntile1:W = ntile3:W	0.9206
ntile4:W	0.0666	0.1897	ntile1:W = ntile4:W	0.9206

Covariate heterogeneity

# Regress each covariate on ntile assignment to means p
cov_means <- lapply(covariates, function(covariate) {
  lm(paste0(covariate, ' ~ 0 + ntile'), data = data_train)
})
# Extract the mean and standard deviation of each covariate per ntile
cov_table <- lapply(cov_means, function(cov_mean) {
  as.data.frame(t(coef(summary(cov_mean))[,c("Estimate", "Std. Error")]))
})

Table 10: Average covariate values in each n-tile
covariates	ntile1	ntile2	ntile3	ntile4
percent_co_party	0.304	0.029	-0.278	0.08
	(0.068)	(0.068)	(0.068)	(0.068)
ideo_homogeneity_offline	0.198	0.003	-0.123	-0.031
	(0.069)	(0.069)	(0.069)	(0.069)
friends_count_wave_1	0.111	-0.102	0.005	0.018
	(0.071)	(0.071)	(0.071)	(0.071)
education	0.087	-0.106	-0.046	0.143
	(0.07)	(0.07)	(0.07)	(0.07)
family_income	-0.122	-0.133	0.033	0.221
	(0.07)	(0.07)	(0.07)	(0.07)
birth_year	0.009	-0.08	0.055	-0.27
	(0.068)	(0.068)	(0.068)	(0.068)
endtime_wave_5	0.325	0.045	-0.148	-0.323
	(0.067)	(0.068)	(0.068)	(0.068)
substantive_ideology_scale_wave_1	-0.684	-0.366	0.116	0.691
	(0.061)	(0.061)	(0.061)	(0.061)
gender	1.643	1.553	1.51	1.369
	(0.034)	(0.034)	(0.034)	(0.034)
northeast	0.188	0.218	0.189	0.18
	(0.028)	(0.028)	(0.028)	(0.028)
north_central	0.174	0.238	0.209	0.238
	(0.029)	(0.029)	(0.029)	(0.029)
south	0.435	0.384	0.345	0.369
	(0.034)	(0.034)	(0.034)	(0.034)
west	0.217	0.175	0.262	0.214
	(0.029)	(0.029)	(0.029)	(0.029)
party_id_wave_1	1.184	1.301	1.476	1.684
	(0.032)	(0.032)	(0.032)	(0.032)
political_wave_1	1.184	1.272	1.34	1.199
	(0.03)	(0.03)	(0.03)	(0.03)
strong_partisan_wave_1	0.73	0.558	0.49	0.607
	(0.034)	(0.034)	(0.034)	(0.034)
freq_twitter_wave_1	1.391	1.432	1.437	1.442
	(0.034)	(0.034)	(0.034)	(0.034)
bot_followers	0.377	0.359	0.408	0.301
	(0.033)	(0.033)	(0.033)	(0.033)
half_complier	0.242	0.267	0.291	0.218
	(0.03)	(0.03)	(0.03)	(0.03)
perfect_complier	0.121	0.102	0.116	0.087
	(0.021)	(0.022)	(0.022)	(0.022)
bin_maker	4.657	6.714	8.097	7.723
	(0.332)	(0.332)	(0.332)	(0.332)
caseid	429.4	585.4	692.8	666.3
	(24.29)	(24.35)	(24.35)	(24.35)

covariate_means_per_ntile <- aggregate(. ~ ntile, data_train, mean)[,covariates]
covariate_means <- apply(data_train, 2, mean)[covariates]
ntile_weights <- table(data_train$ntile) / dim(data_train)[1] 
deviations <- t(apply(covariate_means_per_ntile, 1, function(x) x - covariate_means))
covariate_means_weighted_var <- apply(deviations, 2, function(x) sum(ntile_weights * x^2))
covariate_var <- apply(data_train, 2, var)[covariates]
cov_variation <- covariate_means_weighted_var / covariate_var

Table 11: Covariate variation across n-tiles
substantive_ideology_scale_wave_1	0.2614
party_id_wave_1	0.1474
caseid	0.0798
bin_maker	0.0730
endtime_wave_5	0.0580
percent_co_party	0.0431
gender	0.0391
strong_partisan_wave_1	0.0317
political_wave_1	0.0208
family_income	0.0201
birth_year	0.0159
ideo_homogeneity_offline	0.0138
education	0.0099
bot_followers	0.0065
west	0.0056
friends_count_wave_1	0.0055
south	0.0046
north_central	0.0041
half_complier	0.0039
perfect_complier	0.0018
freq_twitter_wave_1	0.0016
northeast	0.0014

tc <- test_calibration(cf)

Table 12: Best linear fit using forest predictions (on held-out data) as well as the mean forest prediction as regressors, along with heteroskedasticity-robust (HC3) SEs.
Estimate	Std. Error	t value	Pr(>\|t\|)
-0.712611	2.650675	-0.268841	0.788119
-7.282025	1.226377	-5.937838	0.000000

8.Learners

This section shows the creation of the various learner estimates, as well as comparisons and combinations of these estimates.

S-Learner

df <- as.data.frame(apply(df, 2, as.numeric))
data_train <- as.data.frame(apply(data_train, 2, as.numeric))
data_test <- as.data.frame(apply(data_test, 2, as.numeric))
X = as.matrix(data_train[,covariates])
W = data_train$W
Y = data_train$Y
sf = regression_forest(cbind(X, W), Y)
pred.sf.0 = predict(sf, cbind(X, 0))$predictions
pred.sf.1 = predict(sf, cbind(X, 1))$predictions
preds.sf.oob = predict(sf)$predictions
pred.sf.0[W==0] = preds.sf.oob[W==0]
pred.sf.1[W==1] = preds.sf.oob[W==1]
preds.sf = pred.sf.1 - pred.sf.0
# estimate test data
pred.sf.0_test = predict(sf, cbind(data_test, 0))$predictions
pred.sf.1_test = predict(sf, cbind(data_test, 1))$predictions
tauhat_s1_test = pred.sf.1_test - pred.sf.0_test

T-Learner

tf0 = regression_forest(X[W==0,], Y[W==0])
tf1 = regression_forest(X[W==1,], Y[W==1])
tf.preds.0 = predict(tf0, X)$predictions
tf.preds.1 = predict(tf1, X)$predictions
tf.preds.0[W==0] = predict(tf0)$predictions #OOB
tf.preds.1[W==1] = predict(tf1)$predictions #OOB
preds.tf = tf.preds.1 - tf.preds.0
# estimate test data
tf.preds.0_test = predict(tf0,data_test)$predictions
tf.preds.1_test = predict(tf1,data_test)$predictions
tauhat_t1_test = tf.preds.1_test - tf.preds.0_test

X-Learner

tf0 = regression_forest(X[W==0,], Y[W==0])
yhat0 = predict(tf0, X[W==1,])$predictions
xf1 = regression_forest(X[W==1,], Y[W==1]-yhat0)
xf.preds.1 = predict(xf1, X)$predictions
# this line ensures we make OOB predictions when appropriate
xf.preds.1[W==1] = predict(xf1)$predictions
tf1 = regression_forest(X[W==1,], Y[W==1])
yhat1 = predict(tf1, X[W==0,])$predictions
xf0 = regression_forest(X[W==0,], yhat1-Y[W==0])
xf.preds.0 = predict(xf0, X)$predictions
# this line ensures we make OOB predictions when appropriate
xf.preds.0[W==0] = predict(xf0)$predictions
propf = regression_forest(X, W, tune.parameters = TRUE)
ehat = predict(propf)$predictions
preds.xf = (1 - ehat) * xf.preds.1 + ehat * xf.preds.0
# estimate in test data
preds_x1_test.1 = predict(xf1, data_test)$predictions 
preds_x1_test.0 = predict(xf0, data_test)$predictions
ehat_test = predict(propf, data_test)$predictions
tauhat_x1_test = (1 - ehat_test) * preds_x1_test.1 + ehat_test * preds_x1_test.0

Compare MSE

#Comparing predictions
# Compute Y-star
p <- mean(data_test$W)
Y_star <- ((data_test$W - p)/(p*(1-p)))*data_test$Y
# Compute the sample average treatment effect to use as a baseline comparison
tauhat_sample_ate <- with(data_train, mean(Y[W==1]) - mean(Y[W==0]))
#Compute test MSE for all methods 
mse <- data.frame(
  Sample_ATE_Loss = (Y_star - tauhat_sample_ate)^2,
  S_Learner_Loss = (Y_star - tauhat_s1_test)^2,
  T_Learner_Loss = (Y_star - tauhat_t1_test)^2,
  X_Learner_Loss = (Y_star - tauhat_x1_test)^2,
  Causal_Tree_Loss = (Y_star - tauhat_ct_test)^2,
  Causal_Forest_Loss = (Y_star - tauhat_cf_test)^2)

mse_summary1 <- describe(mse)[, c('mean', 'se')]

kable_styling(kable(mse_summary1, "html", digits = 4, row.names=TRUE,
                    caption=tab_title("MSE Across Estimators"), booktabs = TRUE),
              bootstrap_options=c("striped", "hover", "condensed", "responsive"),
              full_width=FALSE)

Table 13: MSE Across Estimators
	mean	se
Sample_ATE_Loss	64.9801	4.9635
S_Learner_Loss	64.9465	4.8664
T_Learner_Loss	64.8882	4.9159
X_Learner_Loss	65.1182	4.9221
Causal_Tree_Loss	65.6354	5.1290
Causal_Forest_Loss	64.9631	4.8694

Compare Mean Estimated Treatment Effects

ates <- data.frame(
  Sample_ATE = rep(tauhat_sample_ate, nrow(data_train)),
  S_Learner_ATE = preds.sf,
  T_Learner_ATE = preds.tf,
  X_Learner_ATE = preds.xf,
  Causal_Forest_ATE= oob_tauhat_cf
)
  
  ate_sum <- describe(ates)[, c('mean', 'se')]
 
  kable_styling(kable(ate_sum,  "html", digits = 4, row.names=TRUE,
                    caption=tab_title("ATE Across Estimators"), booktabs = TRUE),
              bootstrap_options=c("striped", "hover", "condensed", "responsive"),
              full_width=FALSE)

Table 14: ATE Across Estimators
	mean	se
Sample_ATE	0.1244	0.0000
S_Learner_ATE	0.0003	0.0016
T_Learner_ATE	0.0107	0.0044
X_Learner_ATE	0.0051	0.0017
Causal_Forest_ATE	-0.0152	0.0011

R Loss Function

Y.forest.test = regression_forest(X = as.matrix(data_test[covariates]), Y = data_test$Y)
Y.hat.test = predict(Y.forest.test)$predictions
W.forest.test = regression_forest(X = as.matrix(data_test[covariates]), Y = data_test$W)
W.hat.test = predict(W.forest.test)$predictions

mse_rloss <- data.frame(
  Sample_ATE_Loss = (data_test$Y - Y.hat.test - (data_test$W - W.hat.test) * tauhat_sample_ate)^2,
  S_Learner_Loss = (data_test$Y - Y.hat.test - (data_test$W - W.hat.test) * tauhat_s1_test)^2,
  T_Learner_Loss = (data_test$Y - Y.hat.test - (data_test$W - W.hat.test) * tauhat_t1_test)^2,
  X_Learner_Loss = (data_test$Y - Y.hat.test - (data_test$W - W.hat.test) * tauhat_x1_test)^2,
  Causal_Tree_Loss = (data_test$Y - Y.hat.test - (data_test$W - W.hat.test) * tauhat_ct_test)^2,
  Causal_Forest_Loss = (data_test$Y - Y.hat.test - (data_test$W - W.hat.test) * tauhat_cf_test)^2)

mse_rloss_summary <- describe(mse_rloss)[, c('mean', 'se')]

kable_styling(kable(mse_rloss_summary,  "html", digits = 4, row.names=TRUE,
                    caption=tab_title("MSE R Loss Across Estimators"), booktabs = TRUE),
              bootstrap_options=c("striped", "hover", "condensed", "responsive"),
              full_width=FALSE)

Table 15: MSE R Loss Across Estimators
	mean	se
Sample_ATE_Loss	0.2000	0.0204
S_Learner_Loss	0.2026	0.0204
T_Learner_Loss	0.2031	0.0204
X_Learner_Loss	0.2013	0.0204
Causal_Tree_Loss	0.2102	0.0222
Causal_Forest_Loss	0.2026	0.0204

Stacking

library(nnls)
## Stacking test
prob = 0.6
NREP = 50
RESP = data_train$Y - cf$Y.hat
R.mat = cbind(1, data_train$W - prob,
                  (data_train$W - prob) * oob_tauhat_cf,
                  (data_train$W - prob) * preds.xf)
    
stack = nnls(R.mat, RESP)

    
print("coefs")

## [1] "coefs"

print(stack)

## Nonnegative least squares model
## x estimates: 0 0.006282432 0 0 
## residual sum-of-squares: 158.6
## reason terminated: The solution has been computed sucessfully.

stack.results = data.frame(stack$x)

rownames(stack.results) = c("Intercept","W-p","Causal Forest","X Learner")

kable_styling(kable(stack.results,  "html", digits = 4, row.names=TRUE,
                    caption=tab_title("Stacking Coefficients"), booktabs = TRUE),
              bootstrap_options=c("striped", "hover", "condensed", "responsive"),
              full_width=FALSE)

Table 16: Stacking Coefficients
	stack.x
Intercept	0.0000
W-p	0.0063
Causal Forest	0.0000
X Learner	0.0000

Econ 293 Final: Investigating Heterogeneity in a Twitter Experiment

Tyler McDaniel & Nima Dahir

6/7/2019

1. Replication

Compliance

Outcome and Control Variables

Missing Data and Imputed Values

2. Heterogenous treatment effects

Creating Interactions

3. ITT Effects

ITT set up

ITT Models

4. CACE

CACE set up

Lasso Democrats

Lasso Republicans

Two Stage Least Squares

CACE Results

CACE Plot

5. AIPW

AIPW Setup

AIPW Democrats

Lambda Grid Democrats

AIPW Republicans

Lambda Grid Republicans

Descriptive statistics

6. Causal Tree

Data Split

Fitting the tree, cross-validation, and point estimates

Point estimates and standard errors

Plotting tree

Test heterogeneity across leaves

Covariate heterogeneity

7.Causal Forest

Fitting the forest

Point estimates & standard errors

Subgroup heterogeneity

Covariate heterogeneity

8.Learners

S-Learner

T-Learner

X-Learner

Compare MSE

Compare Mean Estimated Treatment Effects

R Loss Function

Stacking