1. Replication
The following code replicates the set-up of the Bail et al. (2018) study. Our results are nearly identical, although not entirely. This could be due to the fact that many covariates are not included in the replication materials.
Compliance
#Compliance measure
twitter_data$complier_scale<-0
twitter_data$complier_scale <-
rowSums(twitter_data[,c("substantive_question_correct_wave_2",
"substantive_question_correct_wave_3",
"substantive_question_correct_wave_4",
"animal_correct_wave_2",
"animal_correct_wave_3",
"animal_correct_wave_4")], na.rm=TRUE)
#partial complier dummy
twitter_data$half_complier<-0
twitter_data$half_complier[twitter_data$complier_scale>0&
twitter_data$complier_scale<6]<-1
#construct full complier dummy
twitter_data$perfect_complier<-0
twitter_data$perfect_complier[twitter_data$complier_scale==6]<-1
partial_compliance_rate<-nrow(twitter_data[(twitter_data$half_complier==1),])/
nrow(twitter_data[(twitter_data$half_complier==0),])
full_compliance_rate<-nrow(twitter_data[(twitter_data$perfect_complier==1),])/
nrow(twitter_data[(twitter_data$perfect_complier==0),])
Outcome and Control Variables
#invert questions that prime liberal values
twitter_data$government_should_regulate_businesses_wave_1<-
8-twitter_data$government_should_regulate_businesses_wave_1
twitter_data$racial_discrimination_hurts_black_people_wave_1<-
8-twitter_data$racial_discrimination_hurts_black_people_wave_1
twitter_data$immigrants_strengthen_country_wave_1<-
8-twitter_data$immigrants_strengthen_country_wave_1
twitter_data$corporations_make_too_much_profit_wave_1<-
8-twitter_data$corporations_make_too_much_profit_wave_1
twitter_data$homosexuality_should_be_accepted_wave_1<-
8-twitter_data$homosexuality_should_be_accepted_wave_1
twitter_data$government_should_regulate_businesses_wave_5<-
8-twitter_data$government_should_regulate_businesses_wave_5
twitter_data$racial_discrimination_hurts_black_people_wave_5<-
8-twitter_data$racial_discrimination_hurts_black_people_wave_5
twitter_data$immigrants_strengthen_country_wave_5<-
8-twitter_data$immigrants_strengthen_country_wave_5
twitter_data$corporations_make_too_much_profit_wave_5<-
8-twitter_data$corporations_make_too_much_profit_wave_5
twitter_data$homosexuality_should_be_accepted_wave_5<-
8-twitter_data$homosexuality_should_be_accepted_wave_5
#calculate chronbach's alpha
alpha_calc<-twitter_data[,c(
"government_should_regulate_businesses_wave_1",
"racial_discrimination_hurts_black_people_wave_1",
"immigrants_strengthen_country_wave_1",
"corporations_make_too_much_profit_wave_1",
"homosexuality_should_be_accepted_wave_1",
"government_wasteful_inefficient_wave_1",
"poor_people_have_it_easy_wave_1",
"government_cannot_afford_to_help_needy_wave_1",
"best_way_peace_military_strength_wave_1",
"stricter_environmental_laws_damaging_wave_1")]
library(psych)
psych::alpha(alpha_calc)
##
## Reliability analysis
## Call: psych::alpha(x = alpha_calc)
##
## raw_alpha std.alpha G6(smc) average_r S/N ase mean sd median_r
## 0.91 0.91 0.91 0.51 10 0.0036 3.4 1.5 0.51
##
## lower alpha upper 95% confidence boundaries
## 0.91 0.91 0.92
##
## Reliability if an item is dropped:
## raw_alpha std.alpha
## government_should_regulate_businesses_wave_1 0.91 0.90
## racial_discrimination_hurts_black_people_wave_1 0.90 0.90
## immigrants_strengthen_country_wave_1 0.91 0.90
## corporations_make_too_much_profit_wave_1 0.91 0.91
## homosexuality_should_be_accepted_wave_1 0.91 0.91
## government_wasteful_inefficient_wave_1 0.91 0.91
## poor_people_have_it_easy_wave_1 0.90 0.90
## government_cannot_afford_to_help_needy_wave_1 0.90 0.90
## best_way_peace_military_strength_wave_1 0.90 0.90
## stricter_environmental_laws_damaging_wave_1 0.89 0.89
## G6(smc) average_r S/N
## government_should_regulate_businesses_wave_1 0.90 0.51 9.5
## racial_discrimination_hurts_black_people_wave_1 0.90 0.50 9.0
## immigrants_strengthen_country_wave_1 0.90 0.51 9.5
## corporations_make_too_much_profit_wave_1 0.91 0.53 10.0
## homosexuality_should_be_accepted_wave_1 0.91 0.52 9.9
## government_wasteful_inefficient_wave_1 0.91 0.53 10.1
## poor_people_have_it_easy_wave_1 0.90 0.50 9.1
## government_cannot_afford_to_help_needy_wave_1 0.90 0.51 9.3
## best_way_peace_military_strength_wave_1 0.90 0.51 9.4
## stricter_environmental_laws_damaging_wave_1 0.89 0.49 8.5
## alpha se var.r med.r
## government_should_regulate_businesses_wave_1 0.0040 0.0090 0.52
## racial_discrimination_hurts_black_people_wave_1 0.0042 0.0086 0.49
## immigrants_strengthen_country_wave_1 0.0040 0.0082 0.51
## corporations_make_too_much_profit_wave_1 0.0038 0.0069 0.52
## homosexuality_should_be_accepted_wave_1 0.0038 0.0079 0.53
## government_wasteful_inefficient_wave_1 0.0038 0.0066 0.53
## poor_people_have_it_easy_wave_1 0.0042 0.0074 0.51
## government_cannot_afford_to_help_needy_wave_1 0.0041 0.0079 0.51
## best_way_peace_military_strength_wave_1 0.0040 0.0083 0.51
## stricter_environmental_laws_damaging_wave_1 0.0044 0.0057 0.49
##
## Item statistics
## n raw.r std.r r.cor
## government_should_regulate_businesses_wave_1 1235 0.73 0.73 0.70
## racial_discrimination_hurts_black_people_wave_1 1238 0.80 0.80 0.78
## immigrants_strengthen_country_wave_1 1234 0.74 0.74 0.70
## corporations_make_too_much_profit_wave_1 1227 0.67 0.68 0.63
## homosexuality_should_be_accepted_wave_1 1230 0.68 0.68 0.63
## government_wasteful_inefficient_wave_1 1234 0.66 0.66 0.61
## poor_people_have_it_easy_wave_1 1231 0.80 0.79 0.77
## government_cannot_afford_to_help_needy_wave_1 1230 0.77 0.77 0.74
## best_way_peace_military_strength_wave_1 1230 0.76 0.76 0.72
## stricter_environmental_laws_damaging_wave_1 1228 0.87 0.87 0.87
## r.drop mean sd
## government_should_regulate_businesses_wave_1 0.66 3.0 1.7
## racial_discrimination_hurts_black_people_wave_1 0.74 4.0 2.1
## immigrants_strengthen_country_wave_1 0.67 2.9 1.8
## corporations_make_too_much_profit_wave_1 0.59 3.3 2.0
## homosexuality_should_be_accepted_wave_1 0.60 2.6 2.0
## government_wasteful_inefficient_wave_1 0.58 4.4 1.9
## poor_people_have_it_easy_wave_1 0.74 3.2 2.1
## government_cannot_afford_to_help_needy_wave_1 0.71 3.1 2.0
## best_way_peace_military_strength_wave_1 0.69 4.1 2.0
## stricter_environmental_laws_damaging_wave_1 0.83 3.3 2.1
##
## Non missing response frequency for each item
## 1 2 3 4 5
## government_should_regulate_businesses_wave_1 0.25 0.20 0.22 0.10 0.10
## racial_discrimination_hurts_black_people_wave_1 0.13 0.19 0.20 0.07 0.07
## immigrants_strengthen_country_wave_1 0.28 0.25 0.14 0.14 0.06
## corporations_make_too_much_profit_wave_1 0.23 0.18 0.16 0.16 0.07
## homosexuality_should_be_accepted_wave_1 0.46 0.17 0.07 0.13 0.03
## government_wasteful_inefficient_wave_1 0.07 0.16 0.12 0.11 0.22
## poor_people_have_it_easy_wave_1 0.32 0.17 0.10 0.07 0.13
## government_cannot_afford_to_help_needy_wave_1 0.31 0.19 0.11 0.10 0.11
## best_way_peace_military_strength_wave_1 0.11 0.17 0.14 0.15 0.16
## stricter_environmental_laws_damaging_wave_1 0.30 0.16 0.10 0.11 0.11
## 6 7 miss
## government_should_regulate_businesses_wave_1 0.08 0.04 0.00
## racial_discrimination_hurts_black_people_wave_1 0.13 0.20 0.00
## immigrants_strengthen_country_wave_1 0.07 0.06 0.00
## corporations_make_too_much_profit_wave_1 0.10 0.09 0.01
## homosexuality_should_be_accepted_wave_1 0.06 0.08 0.01
## government_wasteful_inefficient_wave_1 0.16 0.17 0.00
## poor_people_have_it_easy_wave_1 0.11 0.10 0.01
## government_cannot_afford_to_help_needy_wave_1 0.11 0.07 0.01
## best_way_peace_military_strength_wave_1 0.13 0.15 0.01
## stricter_environmental_laws_damaging_wave_1 0.12 0.10 0.01
#create average score by wave
twitter_data$substantive_ideology_scale_wave_1<-rowMeans(twitter_data[,c(
"government_should_regulate_businesses_wave_1",
"racial_discrimination_hurts_black_people_wave_1",
"immigrants_strengthen_country_wave_1",
"corporations_make_too_much_profit_wave_1",
"homosexuality_should_be_accepted_wave_1",
"government_wasteful_inefficient_wave_1",
"poor_people_have_it_easy_wave_1",
"government_cannot_afford_to_help_needy_wave_1",
"best_way_peace_military_strength_wave_1",
"stricter_environmental_laws_damaging_wave_1")], na.rm=TRUE)
twitter_data$substantive_ideology_scale_wave_5<-rowMeans(twitter_data[,c(
"government_should_regulate_businesses_wave_5",
"racial_discrimination_hurts_black_people_wave_5",
"immigrants_strengthen_country_wave_5",
"corporations_make_too_much_profit_wave_5",
"homosexuality_should_be_accepted_wave_5",
"government_wasteful_inefficient_wave_5",
"poor_people_have_it_easy_wave_5",
"government_cannot_afford_to_help_needy_wave_5",
"best_way_peace_military_strength_wave_5",
"stricter_environmental_laws_damaging_wave_5")], na.rm=TRUE)
#Control Variables
control_variables<-twitter_data[,c(
"percent_co_party",
"political_wave_1",
"freq_twitter_wave_1",
"friends_count_wave_1",
"birth_year",
"family_income",
"education",
"gender",
"ideo_homogeneity_offline",
"northeast",
"north_central",
"south",
"west",
"caseid",
"bin_maker")]
Missing Data and Imputed Values
#examine missing data in first wave
library(Amelia)
## Loading required package: Rcpp
## ##
## ## Amelia II: Multiple Imputation
## ## (Version 1.7.5, built: 2018-05-07)
## ## Copyright (C) 2005-2019 James Honaker, Gary King and Matthew Blackwell
## ## Refer to http://gking.harvard.edu/amelia/ for more information
## ##
missmap(control_variables,main = fig_title("Missingness Map"))

#impute missing data from first wave
forimputation<-cbind(twitter_data$substantive_ideology_scale_wave_1, control_variables)
colnames(forimputation)[colnames(forimputation)==
"twitter_data$substantive_ideology_scale_wave_1"]<-
"substantive_ideology_scale_wave_1"
library(mice)
## Loading required package: lattice
##
## Attaching package: 'mice'
## The following objects are masked from 'package:base':
##
## cbind, rbind
#prepare variables for imputation
forimputation$caseid<-as.character(forimputation$caseid)
#take log of variables with heavy skew
forimputation$percent_co_party<-log(forimputation$percent_co_party+1)
forimputation$friends_count_wave_1<-log(forimputation$friends_count_wave_1+1)
#impute
imputed_data <- mice(forimputation,m=15,seed=352,
exclude=c("caseid","bin_maker"))
##
## iter imp variable
## 1 1 substantive_ideology_scale_wave_1* percent_co_party friends_count_wave_1* birth_year family_income education gender ideo_homogeneity_offline* northeast* north_central* south
## 1 2 substantive_ideology_scale_wave_1* percent_co_party* friends_count_wave_1 birth_year* family_income* education* gender* ideo_homogeneity_offline* northeast* north_central south*
## 1 3 substantive_ideology_scale_wave_1* percent_co_party* friends_count_wave_1* birth_year* family_income* education* gender ideo_homogeneity_offline northeast north_central south*
## 1 4 substantive_ideology_scale_wave_1 percent_co_party* friends_count_wave_1* birth_year family_income* education gender* ideo_homogeneity_offline northeast north_central* south*
## 1 5 substantive_ideology_scale_wave_1 percent_co_party friends_count_wave_1 birth_year* family_income education* gender ideo_homogeneity_offline northeast* north_central* south
## 1 6 substantive_ideology_scale_wave_1 percent_co_party* friends_count_wave_1 birth_year* family_income education gender ideo_homogeneity_offline northeast* north_central* south
## 1 7 substantive_ideology_scale_wave_1 percent_co_party* friends_count_wave_1 birth_year family_income* education gender* ideo_homogeneity_offline northeast north_central south
## 1 8 substantive_ideology_scale_wave_1* percent_co_party friends_count_wave_1 birth_year* family_income education* gender* ideo_homogeneity_offline* northeast* north_central south
## 1 9 substantive_ideology_scale_wave_1* percent_co_party* friends_count_wave_1 birth_year family_income* education gender ideo_homogeneity_offline* northeast north_central south*
## 1 10 substantive_ideology_scale_wave_1* percent_co_party* friends_count_wave_1 birth_year* family_income education gender ideo_homogeneity_offline* northeast* north_central* south
## 1 11 substantive_ideology_scale_wave_1 percent_co_party* friends_count_wave_1 birth_year family_income education* gender ideo_homogeneity_offline northeast* north_central south
## 1 12 substantive_ideology_scale_wave_1* percent_co_party friends_count_wave_1* birth_year family_income education* gender ideo_homogeneity_offline northeast north_central south
## 1 13 substantive_ideology_scale_wave_1* percent_co_party* friends_count_wave_1* birth_year* family_income* education gender ideo_homogeneity_offline* northeast* north_central south
## 1 14 substantive_ideology_scale_wave_1* percent_co_party friends_count_wave_1* birth_year family_income education* gender ideo_homogeneity_offline northeast north_central south
## 1 15 substantive_ideology_scale_wave_1* percent_co_party* friends_count_wave_1 birth_year family_income education gender ideo_homogeneity_offline* northeast* north_central* south
## 2 1 substantive_ideology_scale_wave_1 percent_co_party* friends_count_wave_1* birth_year* family_income* education gender ideo_homogeneity_offline northeast* north_central* south
## 2 2 substantive_ideology_scale_wave_1* percent_co_party friends_count_wave_1 birth_year family_income* education gender ideo_homogeneity_offline* northeast north_central south
## 2 3 substantive_ideology_scale_wave_1 percent_co_party* friends_count_wave_1* birth_year* family_income* education* gender* ideo_homogeneity_offline* northeast* north_central* south
## 2 4 substantive_ideology_scale_wave_1* percent_co_party friends_count_wave_1 birth_year* family_income* education* gender* ideo_homogeneity_offline northeast north_central south
## 2 5 substantive_ideology_scale_wave_1* percent_co_party* friends_count_wave_1* birth_year* family_income* education* gender* ideo_homogeneity_offline northeast north_central* south
## 2 6 substantive_ideology_scale_wave_1 percent_co_party* friends_count_wave_1* birth_year family_income education gender* ideo_homogeneity_offline northeast* north_central* south*
## 2 7 substantive_ideology_scale_wave_1* percent_co_party friends_count_wave_1 birth_year* family_income* education gender ideo_homogeneity_offline* northeast north_central* south*
## 2 8 substantive_ideology_scale_wave_1 percent_co_party* friends_count_wave_1* birth_year family_income education* gender* ideo_homogeneity_offline northeast* north_central south
## 2 9 substantive_ideology_scale_wave_1 percent_co_party* friends_count_wave_1 birth_year family_income education* gender* ideo_homogeneity_offline northeast* north_central south*
## 2 10 substantive_ideology_scale_wave_1 percent_co_party friends_count_wave_1* birth_year* family_income* education* gender* ideo_homogeneity_offline northeast* north_central south
## 2 11 substantive_ideology_scale_wave_1* percent_co_party friends_count_wave_1 birth_year* family_income education* gender ideo_homogeneity_offline northeast north_central* south
## 2 12 substantive_ideology_scale_wave_1* percent_co_party* friends_count_wave_1 birth_year family_income education* gender ideo_homogeneity_offline* northeast* north_central south*
## 2 13 substantive_ideology_scale_wave_1 percent_co_party* friends_count_wave_1 birth_year family_income education gender ideo_homogeneity_offline northeast north_central south
## 2 14 substantive_ideology_scale_wave_1 percent_co_party* friends_count_wave_1 birth_year* family_income* education* gender* ideo_homogeneity_offline* northeast north_central south*
## 2 15 substantive_ideology_scale_wave_1 percent_co_party friends_count_wave_1* birth_year family_income education* gender* ideo_homogeneity_offline* northeast north_central south
## 3 1 substantive_ideology_scale_wave_1* percent_co_party* friends_count_wave_1* birth_year* family_income education gender ideo_homogeneity_offline* northeast* north_central south
## 3 2 substantive_ideology_scale_wave_1 percent_co_party* friends_count_wave_1* birth_year* family_income education gender ideo_homogeneity_offline* northeast* north_central south
## 3 3 substantive_ideology_scale_wave_1* percent_co_party friends_count_wave_1 birth_year family_income* education gender ideo_homogeneity_offline northeast* north_central* south*
## 3 4 substantive_ideology_scale_wave_1 percent_co_party* friends_count_wave_1 birth_year* family_income* education* gender ideo_homogeneity_offline* northeast* north_central south
## 3 5 substantive_ideology_scale_wave_1* percent_co_party* friends_count_wave_1* birth_year* family_income education gender* ideo_homogeneity_offline* northeast* north_central* south
## 3 6 substantive_ideology_scale_wave_1* percent_co_party friends_count_wave_1 birth_year* family_income education gender* ideo_homogeneity_offline northeast north_central* south*
## 3 7 substantive_ideology_scale_wave_1 percent_co_party friends_count_wave_1* birth_year* family_income* education* gender* ideo_homogeneity_offline northeast* north_central* south
## 3 8 substantive_ideology_scale_wave_1* percent_co_party friends_count_wave_1 birth_year* family_income education gender ideo_homogeneity_offline* northeast* north_central south*
## 3 9 substantive_ideology_scale_wave_1 percent_co_party friends_count_wave_1* birth_year* family_income* education* gender ideo_homogeneity_offline* northeast north_central south
## 3 10 substantive_ideology_scale_wave_1 percent_co_party friends_count_wave_1 birth_year family_income* education gender ideo_homogeneity_offline* northeast* north_central south*
## 3 11 substantive_ideology_scale_wave_1* percent_co_party friends_count_wave_1 birth_year* family_income* education gender* ideo_homogeneity_offline northeast* north_central south
## 3 12 substantive_ideology_scale_wave_1* percent_co_party friends_count_wave_1* birth_year* family_income* education* gender* ideo_homogeneity_offline* northeast* north_central south
## 3 13 substantive_ideology_scale_wave_1 percent_co_party friends_count_wave_1* birth_year family_income* education gender* ideo_homogeneity_offline northeast north_central south*
## 3 14 substantive_ideology_scale_wave_1* percent_co_party friends_count_wave_1 birth_year family_income education* gender ideo_homogeneity_offline* northeast* north_central south
## 3 15 substantive_ideology_scale_wave_1* percent_co_party friends_count_wave_1 birth_year family_income education* gender* ideo_homogeneity_offline northeast* north_central south
## 4 1 substantive_ideology_scale_wave_1* percent_co_party* friends_count_wave_1* birth_year* family_income education gender ideo_homogeneity_offline* northeast* north_central south
## 4 2 substantive_ideology_scale_wave_1* percent_co_party* friends_count_wave_1* birth_year* family_income* education* gender ideo_homogeneity_offline* northeast north_central south
## 4 3 substantive_ideology_scale_wave_1 percent_co_party friends_count_wave_1 birth_year* family_income education gender* ideo_homogeneity_offline* northeast north_central south
## 4 4 substantive_ideology_scale_wave_1 percent_co_party friends_count_wave_1 birth_year* family_income* education* gender* ideo_homogeneity_offline northeast* north_central south
## 4 5 substantive_ideology_scale_wave_1 percent_co_party* friends_count_wave_1* birth_year* family_income education gender* ideo_homogeneity_offline northeast* north_central* south
## 4 6 substantive_ideology_scale_wave_1 percent_co_party friends_count_wave_1 birth_year* family_income education* gender* ideo_homogeneity_offline northeast north_central south
## 4 7 substantive_ideology_scale_wave_1 percent_co_party friends_count_wave_1* birth_year* family_income education* gender ideo_homogeneity_offline* northeast* north_central south
## 4 8 substantive_ideology_scale_wave_1 percent_co_party* friends_count_wave_1 birth_year family_income education gender ideo_homogeneity_offline northeast* north_central south
## 4 9 substantive_ideology_scale_wave_1* percent_co_party* friends_count_wave_1* birth_year family_income* education gender ideo_homogeneity_offline* northeast north_central* south*
## 4 10 substantive_ideology_scale_wave_1 percent_co_party* friends_count_wave_1* birth_year* family_income education* gender ideo_homogeneity_offline* northeast north_central south
## 4 11 substantive_ideology_scale_wave_1* percent_co_party* friends_count_wave_1* birth_year family_income education gender* ideo_homogeneity_offline northeast* north_central south*
## 4 12 substantive_ideology_scale_wave_1 percent_co_party friends_count_wave_1 birth_year* family_income* education* gender ideo_homogeneity_offline northeast north_central south
## 4 13 substantive_ideology_scale_wave_1* percent_co_party friends_count_wave_1* birth_year family_income* education gender ideo_homogeneity_offline* northeast north_central south
## 4 14 substantive_ideology_scale_wave_1 percent_co_party friends_count_wave_1 birth_year* family_income* education gender* ideo_homogeneity_offline northeast north_central* south
## 4 15 substantive_ideology_scale_wave_1 percent_co_party* friends_count_wave_1 birth_year family_income* education* gender* ideo_homogeneity_offline northeast* north_central south
## 5 1 substantive_ideology_scale_wave_1* percent_co_party* friends_count_wave_1* birth_year* family_income education gender* ideo_homogeneity_offline* northeast north_central south*
## 5 2 substantive_ideology_scale_wave_1* percent_co_party* friends_count_wave_1 birth_year* family_income education gender* ideo_homogeneity_offline* northeast* north_central south
## 5 3 substantive_ideology_scale_wave_1* percent_co_party friends_count_wave_1 birth_year* family_income education gender ideo_homogeneity_offline northeast* north_central* south
## 5 4 substantive_ideology_scale_wave_1 percent_co_party* friends_count_wave_1 birth_year family_income* education* gender ideo_homogeneity_offline northeast* north_central south
## 5 5 substantive_ideology_scale_wave_1 percent_co_party* friends_count_wave_1 birth_year* family_income education* gender* ideo_homogeneity_offline northeast north_central* south
## 5 6 substantive_ideology_scale_wave_1* percent_co_party* friends_count_wave_1* birth_year* family_income education* gender* ideo_homogeneity_offline northeast north_central* south
## 5 7 substantive_ideology_scale_wave_1* percent_co_party* friends_count_wave_1 birth_year family_income* education gender* ideo_homogeneity_offline* northeast* north_central* south*
## 5 8 substantive_ideology_scale_wave_1 percent_co_party friends_count_wave_1 birth_year family_income* education* gender* ideo_homogeneity_offline northeast north_central* south
## 5 9 substantive_ideology_scale_wave_1* percent_co_party friends_count_wave_1* birth_year family_income* education gender ideo_homogeneity_offline northeast north_central* south
## 5 10 substantive_ideology_scale_wave_1* percent_co_party friends_count_wave_1* birth_year* family_income education gender* ideo_homogeneity_offline northeast* north_central south*
## 5 11 substantive_ideology_scale_wave_1 percent_co_party friends_count_wave_1* birth_year* family_income* education gender* ideo_homogeneity_offline northeast north_central south
## 5 12 substantive_ideology_scale_wave_1 percent_co_party* friends_count_wave_1* birth_year family_income* education* gender* ideo_homogeneity_offline* northeast north_central* south
## 5 13 substantive_ideology_scale_wave_1 percent_co_party friends_count_wave_1 birth_year* family_income* education gender ideo_homogeneity_offline northeast north_central south*
## 5 14 substantive_ideology_scale_wave_1* percent_co_party* friends_count_wave_1* birth_year family_income* education* gender ideo_homogeneity_offline northeast north_central south
## 5 15 substantive_ideology_scale_wave_1 percent_co_party friends_count_wave_1* birth_year* family_income education gender* ideo_homogeneity_offline northeast* north_central south*
## * Please inspect the loggedEvents
## Warning: Number of logged events: 1214
imputed_data <- mice::complete(imputed_data,action=15)
twitter_data$strong_partisan_wave_1 <- ifelse(twitter_data$party_id_weak_strong_wave_1 == 1 | twitter_data$party_id_weak_strong_wave_1 == 7, 1, 0)
#reassemble dataaset with additional variables we need for subsequent analysis
to_bind<-twitter_data[,c("treat",
"perfect_complier",
"half_complier",
"bot_followers",
"party_id_wave_1",
"strong_partisan_wave_1",
"substantive_ideology_scale_wave_5",
"endtime_wave_5")]
final_data<- cbind(to_bind, imputed_data)
save(final_data, file="Final Data for Models.Rdata")
3. ITT Effects
This section calculates the intent to treat effects using the Lasso model to select covariates.
ITT set up
################################################################
## ITT
################################################################
## LASSO
library(amlinear)
library(ivpack)
## Loading required package: AER
## Loading required package: car
## Loading required package: carData
##
## Attaching package: 'car'
## The following object is masked from 'package:psych':
##
## logit
## The following object is masked from 'package:dplyr':
##
## recode
## Loading required package: lmtest
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
## Loading required package: survival
# regress treat on interaction variables (seperately
# for full compliers, half compliers, and bot followers)
# * does it make sense to include news, party, and strength rather than bin?*
df.t <- df[,c("W",
"substantive_ideology_scale_wave_1",
"percent_co_party",
"friends_count_wave_1",
"birth_year",
"family_income",
"education",
"gender",
"ideo_homogeneity_offline",
"northeast",
"north_central",
"south",
"strong_partisan_wave_1",
"freq_twitter_wave_1",
"party_id_wave_1",
"political_wave_1")]
# replace 0s in W with -1
df.t$W = df.t$W -1*(1-df.t$W)
df.t.int <- model.matrix(~ . * ., data = df.t)
df.t.2 <- sapply(df.t[,c("percent_co_party","ideo_homogeneity_offline",
"friends_count_wave_1","education","family_income",
"birth_year","substantive_ideology_scale_wave_1")],
function(x) x^2)
df.t.mod <- cbind(df.t.int,df.t.2)
ITT Models
# try LASSO model to select instruments
library(glmnet)
# create hyperparameter grid
hyper_grid <- expand.grid(
alpha = c(0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1),
nfolds = c(5,10)
)
# create nonzero results grid
non_zero.t <- matrix(nrow = ncol(df.t.mod),ncol = 22)
for(i in 1:nrow(hyper_grid)){
lasso.fit.t <- cv.glmnet(df.t.mod, df$Y, weights = df$weights,
lambda = seq(0.001,0.1,by = 0.001),
alpha = hyper_grid$alpha[i],
nfolds = hyper_grid$nfolds[i])
coef.t <- coef(lasso.fit.t, s = "lambda.min")
coef.t[which(coef.t != 0)]
vars.t <- coef.t@Dimnames[[1]][which(coef.t != 0 ) ]
#print(vars.t) ### should do cool graph here
non_zero.t[,i] <- colnames(df.t.mod) %in% vars.t
}
# examine number of times each covariate shows up in models
rownames(non_zero.t) <- colnames(df.t.mod)
sort(apply(non_zero.t,1,sum))
## northeast:north_central
## 0
## northeast:south
## 0
## W
## 2
## percent_co_party
## 2
## friends_count_wave_1
## 2
## family_income
## 2
## gender
## 2
## northeast
## 2
## north_central
## 2
## south
## 2
## strong_partisan_wave_1
## 2
## freq_twitter_wave_1
## 2
## W:birth_year
## 2
## W:north_central
## 2
## W:south
## 2
## W:freq_twitter_wave_1
## 2
## W:political_wave_1
## 2
## substantive_ideology_scale_wave_1:friends_count_wave_1
## 2
## substantive_ideology_scale_wave_1:birth_year
## 2
## substantive_ideology_scale_wave_1:ideo_homogeneity_offline
## 2
## percent_co_party:family_income
## 2
## percent_co_party:gender
## 2
## percent_co_party:party_id_wave_1
## 2
## friends_count_wave_1:family_income
## 2
## friends_count_wave_1:education
## 2
## friends_count_wave_1:gender
## 2
## friends_count_wave_1:north_central
## 2
## friends_count_wave_1:south
## 2
## friends_count_wave_1:freq_twitter_wave_1
## 2
## friends_count_wave_1:party_id_wave_1
## 2
## friends_count_wave_1:political_wave_1
## 2
## birth_year:family_income
## 2
## birth_year:gender
## 2
## birth_year:ideo_homogeneity_offline
## 2
## birth_year:northeast
## 2
## birth_year:strong_partisan_wave_1
## 2
## birth_year:freq_twitter_wave_1
## 2
## birth_year:political_wave_1
## 2
## family_income:north_central
## 2
## family_income:party_id_wave_1
## 2
## family_income:political_wave_1
## 2
## education:gender
## 2
## education:northeast
## 2
## education:strong_partisan_wave_1
## 2
## education:freq_twitter_wave_1
## 2
## education:party_id_wave_1
## 2
## gender:ideo_homogeneity_offline
## 2
## gender:north_central
## 2
## gender:south
## 2
## gender:strong_partisan_wave_1
## 2
## gender:freq_twitter_wave_1
## 2
## gender:party_id_wave_1
## 2
## ideo_homogeneity_offline:south
## 2
## ideo_homogeneity_offline:freq_twitter_wave_1
## 2
## ideo_homogeneity_offline:party_id_wave_1
## 2
## ideo_homogeneity_offline:political_wave_1
## 2
## northeast:strong_partisan_wave_1
## 2
## northeast:freq_twitter_wave_1
## 2
## northeast:party_id_wave_1
## 2
## northeast:political_wave_1
## 2
## north_central:south
## 2
## north_central:strong_partisan_wave_1
## 2
## north_central:freq_twitter_wave_1
## 2
## north_central:political_wave_1
## 2
## south:party_id_wave_1
## 2
## strong_partisan_wave_1:freq_twitter_wave_1
## 2
## strong_partisan_wave_1:political_wave_1
## 2
## party_id_wave_1:political_wave_1
## 2
## percent_co_party
## 2
## friends_count_wave_1
## 2
## family_income
## 2
## W:gender
## 3
## W:ideo_homogeneity_offline
## 3
## substantive_ideology_scale_wave_1:political_wave_1
## 3
## friends_count_wave_1:birth_year
## 3
## friends_count_wave_1:northeast
## 3
## family_income:ideo_homogeneity_offline
## 3
## education:political_wave_1
## 3
## education
## 4
## W:northeast
## 4
## substantive_ideology_scale_wave_1:north_central
## 4
## percent_co_party:ideo_homogeneity_offline
## 4
## percent_co_party:freq_twitter_wave_1
## 4
## friends_count_wave_1:ideo_homogeneity_offline
## 4
## birth_year:north_central
## 4
## gender:political_wave_1
## 4
## north_central:party_id_wave_1
## 4
## education
## 4
## W:percent_co_party
## 5
## W:friends_count_wave_1
## 5
## W:family_income
## 5
## percent_co_party:education
## 5
## percent_co_party:northeast
## 5
## birth_year:south
## 5
## family_income:gender
## 5
## family_income:south
## 5
## family_income:strong_partisan_wave_1
## 5
## education:ideo_homogeneity_offline
## 5
## education:north_central
## 5
## ideo_homogeneity_offline:north_central
## 5
## ideo_homogeneity_offline:strong_partisan_wave_1
## 5
## south:strong_partisan_wave_1
## 5
## south:freq_twitter_wave_1
## 5
## W:substantive_ideology_scale_wave_1
## 6
## percent_co_party:friends_count_wave_1
## 6
## percent_co_party:political_wave_1
## 6
## birth_year:education
## 6
## birth_year:party_id_wave_1
## 6
## ideo_homogeneity_offline:northeast
## 6
## political_wave_1
## 7
## percent_co_party:south
## 7
## ideo_homogeneity_offline
## 8
## family_income:education
## 8
## ideo_homogeneity_offline
## 8
## W:education
## 9
## gender:northeast
## 9
## birth_year
## 11
## percent_co_party:north_central
## 11
## birth_year
## 11
## substantive_ideology_scale_wave_1:gender
## 13
## substantive_ideology_scale_wave_1:south
## 13
## education:south
## 13
## friends_count_wave_1:strong_partisan_wave_1
## 14
## family_income:freq_twitter_wave_1
## 14
## freq_twitter_wave_1:political_wave_1
## 14
## percent_co_party:birth_year
## 15
## W:strong_partisan_wave_1
## 16
## substantive_ideology_scale_wave_1:family_income
## 16
## substantive_ideology_scale_wave_1:northeast
## 17
## family_income:northeast
## 18
## south:political_wave_1
## 18
## W:party_id_wave_1
## 19
## substantive_ideology_scale_wave_1:percent_co_party
## 19
## percent_co_party:strong_partisan_wave_1
## 19
## substantive_ideology_scale_wave_1:freq_twitter_wave_1
## 20
## substantive_ideology_scale_wave_1:party_id_wave_1
## 20
## substantive_ideology_scale_wave_1:education
## 21
## (Intercept)
## 22
## substantive_ideology_scale_wave_1
## 22
## party_id_wave_1
## 22
## substantive_ideology_scale_wave_1:strong_partisan_wave_1
## 22
## strong_partisan_wave_1:party_id_wave_1
## 22
## freq_twitter_wave_1:party_id_wave_1
## 22
## substantive_ideology_scale_wave_1
## 22
df.lm <- data.frame(cbind(df.t.mod,df$Y,df$W,df$bin_maker))
# change non-treated values back to 0
df.lm$W = (df.t$W+1)/2
colnames(df.lm) <- c(colnames(df.t.mod),"Y","W","bin_maker")
democrats <- df.lm[df$party_id_wave_1 == 1,]
republicans <- df.lm[df$party_id_wave_1 == 2,]
# Qualitatively pick from vars.t and the sort function above
#Republicans
republican_ITT_int_model<-lm(Y~
#treatment assignment variable
W+
#pre-treatment ideology score
substantive_ideology_scale_wave_1+
#% of people followed on Twitter from same party
percent_co_party+
#demographics
birth_year +
family_income+
education+
northeast+
south+
family_income:northeast+
percent_co_party:birth_year+
# instead of factor, we broke bins into vars
substantive_ideology_scale_wave_1:education+
substantive_ideology_scale_wave_1:political_wave_1+
political_wave_1+
south+
south:political_wave_1+
strong_partisan_wave_1+
freq_twitter_wave_1,
data=republicans)
coefficients<-data.frame(summary(republican_ITT_int_model)$coefficients)
library(pander)
panderOptions('digits',3)
panderOptions('table.split.table', 300)
set.caption(tab_title("Intent-to-Treat Interactions Model (Republicans)"))
pander(coefficients)
Table 1: Intent-to-Treat Interactions Model (Republicans)
| (Intercept) |
3.37 |
0.144 |
23.5 |
4.83e-78 |
| W |
0.0913 |
0.0454 |
2.01 |
0.0449 |
| substantive_ideology_scale_wave_1 |
1.42 |
0.108 |
13.2 |
1.25e-33 |
| percent_co_party |
0.0374 |
0.0255 |
1.47 |
0.143 |
| birth_year |
0.00999 |
0.0308 |
0.324 |
0.746 |
| family_income |
0.00875 |
0.0272 |
0.322 |
0.748 |
| education |
-0.042 |
0.0386 |
-1.09 |
0.277 |
| northeast |
0.0231 |
0.0648 |
0.356 |
0.722 |
| south |
-0.0134 |
0.135 |
-0.0993 |
0.921 |
| political_wave_1 |
-0.00323 |
0.0906 |
-0.0356 |
0.972 |
| strong_partisan_wave_1 |
0.169 |
0.0477 |
3.55 |
0.000433 |
| freq_twitter_wave_1 |
0.0989 |
0.0451 |
2.19 |
0.0291 |
| family_income:northeast |
-0.0828 |
0.0541 |
-1.53 |
0.127 |
| percent_co_party:birth_year |
0.0164 |
0.0246 |
0.668 |
0.504 |
| substantive_ideology_scale_wave_1:education |
0.0513 |
0.033 |
1.56 |
0.121 |
| substantive_ideology_scale_wave_1:political_wave_1 |
-0.136 |
0.0806 |
-1.69 |
0.0927 |
| south:political_wave_1 |
0.000248 |
0.0964 |
0.00258 |
0.998 |
#Democrats
democrat_ITT_int_model<-lm(Y~
#treatment assignment variable
W+
#pre-treatment ideology score
substantive_ideology_scale_wave_1+
#% of people followed on Twitter from same party
percent_co_party+
#demographics
birth_year +
family_income+
education+
northeast+
south+
family_income:northeast+
percent_co_party:birth_year+
# instead of factor, we broke bins into vars
substantive_ideology_scale_wave_1:education+
substantive_ideology_scale_wave_1:political_wave_1+
political_wave_1+
south+
south:political_wave_1+
strong_partisan_wave_1+
freq_twitter_wave_1,
data=democrats)
coefficients<-data.frame(summary(democrat_ITT_int_model)$coefficients)
library(pander)
panderOptions('digits',3)
panderOptions('table.split.table', 300)
set.caption(tab_title("Intent-to-Treat Interactions Model (Democrats)"))
pander(coefficients)
Table 2: Intent-to-Treat Interactions Model (Democrats)
| (Intercept) |
3.02 |
0.108 |
27.9 |
1.33e-109 |
| W |
-0.0166 |
0.0325 |
-0.512 |
0.609 |
| substantive_ideology_scale_wave_1 |
1.38 |
0.0994 |
13.9 |
5.26e-38 |
| percent_co_party |
-0.00479 |
0.0239 |
-0.2 |
0.841 |
| birth_year |
0.0181 |
0.0216 |
0.836 |
0.404 |
| family_income |
0.00607 |
0.0194 |
0.313 |
0.754 |
| education |
0.0469 |
0.0251 |
1.87 |
0.0616 |
| northeast |
-0.0442 |
0.0429 |
-1.03 |
0.303 |
| south |
0.309 |
0.107 |
2.89 |
0.00395 |
| political_wave_1 |
0.242 |
0.0638 |
3.79 |
0.000164 |
| strong_partisan_wave_1 |
-0.0963 |
0.0374 |
-2.58 |
0.0102 |
| freq_twitter_wave_1 |
0.0066 |
0.0337 |
0.196 |
0.845 |
| family_income:northeast |
-0.0797 |
0.042 |
-1.9 |
0.0582 |
| percent_co_party:birth_year |
-0.0601 |
0.0246 |
-2.44 |
0.0148 |
| substantive_ideology_scale_wave_1:education |
0.0518 |
0.0289 |
1.79 |
0.0734 |
| substantive_ideology_scale_wave_1:political_wave_1 |
-0.108 |
0.0777 |
-1.39 |
0.164 |
| south:political_wave_1 |
-0.27 |
0.084 |
-3.21 |
0.00139 |
4. CACE
Complier Average Causal Effects are calculated using the Lasso model (to select covariates) and the Two Stage Least Squares model.
CACE set up
Lasso Democrats
###### LASSO FOR DEMOCRATS
# try LASSO model to select instruments
library(glmnet)
# create hyperparameter grid
hyper_grid <- expand.grid(
alpha = c(0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1),
nfolds = c(5,10)
)
# create nonzero results grid
non_zero.x <- matrix(nrow = ncol(democrats),ncol = 22)
set.seed(1)
for(i in 1:nrow(hyper_grid)){
lasso.fit.x <- cv.glmnet(democrats, df$Y[dems], weights = df$weights[dems],
lambda = seq(0.001,0.1,by = 0.001),
alpha = hyper_grid$alpha[i],
nfolds = hyper_grid$nfolds[i])
coef.x <- coef(lasso.fit.x, s = "lambda.min")
coef.x[which(coef.x != 0)]
vars.x <- coef.x@Dimnames[[1]][which(coef.x != 0 ) ]
#print(vars.x) ### should do cool graph here
non_zero.x[,i] <- colnames(democrats) %in% vars.x
}
# later examine number of times each covariate shows up in models
## now we decide which variables to add
rownames(non_zero.x) <- colnames(df.x.mod)
sort(apply(non_zero.x,1,sum))
## party_id_wave_1
## 0
## northeast:north_central
## 0
## northeast:south
## 0
## percent_co_party
## 2
## friends_count_wave_1
## 2
## birth_year
## 2
## education
## 2
## ideo_homogeneity_offline
## 2
## northeast
## 2
## north_central
## 2
## south
## 2
## freq_twitter_wave_1
## 2
## substantive_ideology_scale_wave_1:percent_co_party
## 2
## substantive_ideology_scale_wave_1:friends_count_wave_1
## 2
## substantive_ideology_scale_wave_1:family_income
## 2
## substantive_ideology_scale_wave_1:strong_partisan_wave_1
## 2
## percent_co_party:education
## 2
## percent_co_party:gender
## 2
## percent_co_party:northeast
## 2
## percent_co_party:freq_twitter_wave_1
## 2
## percent_co_party:party_id_wave_1
## 2
## percent_co_party:political_wave_1
## 2
## friends_count_wave_1:birth_year
## 2
## friends_count_wave_1:family_income
## 2
## friends_count_wave_1:education
## 2
## friends_count_wave_1:ideo_homogeneity_offline
## 2
## friends_count_wave_1:northeast
## 2
## friends_count_wave_1:north_central
## 2
## friends_count_wave_1:south
## 2
## friends_count_wave_1:party_id_wave_1
## 2
## friends_count_wave_1:political_wave_1
## 2
## birth_year:family_income
## 2
## birth_year:northeast
## 2
## birth_year:south
## 2
## birth_year:strong_partisan_wave_1
## 2
## birth_year:party_id_wave_1
## 2
## birth_year:political_wave_1
## 2
## family_income:gender
## 2
## family_income:north_central
## 2
## family_income:strong_partisan_wave_1
## 2
## family_income:freq_twitter_wave_1
## 2
## family_income:political_wave_1
## 2
## education:gender
## 2
## education:ideo_homogeneity_offline
## 2
## education:north_central
## 2
## education:south
## 2
## education:freq_twitter_wave_1
## 2
## education:party_id_wave_1
## 2
## education:political_wave_1
## 2
## gender:ideo_homogeneity_offline
## 2
## gender:south
## 2
## gender:strong_partisan_wave_1
## 2
## gender:freq_twitter_wave_1
## 2
## ideo_homogeneity_offline:north_central
## 2
## ideo_homogeneity_offline:south
## 2
## ideo_homogeneity_offline:freq_twitter_wave_1
## 2
## ideo_homogeneity_offline:party_id_wave_1
## 2
## ideo_homogeneity_offline:political_wave_1
## 2
## northeast:freq_twitter_wave_1
## 2
## northeast:party_id_wave_1
## 2
## northeast:political_wave_1
## 2
## north_central:strong_partisan_wave_1
## 2
## north_central:freq_twitter_wave_1
## 2
## north_central:party_id_wave_1
## 2
## north_central:political_wave_1
## 2
## south:strong_partisan_wave_1
## 2
## south:party_id_wave_1
## 2
## strong_partisan_wave_1:freq_twitter_wave_1
## 2
## strong_partisan_wave_1:political_wave_1
## 2
## freq_twitter_wave_1:party_id_wave_1
## 2
## friends_count_wave_1.2
## 2
## gender
## 4
## substantive_ideology_scale_wave_1:birth_year
## 4
## substantive_ideology_scale_wave_1:education
## 4
## substantive_ideology_scale_wave_1:ideo_homogeneity_offline
## 4
## substantive_ideology_scale_wave_1:north_central
## 4
## substantive_ideology_scale_wave_1:south
## 4
## percent_co_party:friends_count_wave_1
## 4
## percent_co_party:north_central
## 4
## percent_co_party:south
## 4
## percent_co_party:strong_partisan_wave_1
## 4
## friends_count_wave_1:gender
## 4
## friends_count_wave_1:freq_twitter_wave_1
## 4
## birth_year:gender
## 4
## birth_year:ideo_homogeneity_offline
## 4
## birth_year:freq_twitter_wave_1
## 4
## family_income:education
## 4
## family_income:ideo_homogeneity_offline
## 4
## family_income:south
## 4
## education:northeast
## 4
## education:strong_partisan_wave_1
## 4
## gender:northeast
## 4
## gender:party_id_wave_1
## 4
## north_central:south
## 4
## percent_co_party.2
## 4
## birth_year.2
## 4
## family_income
## 6
## substantive_ideology_scale_wave_1:political_wave_1
## 6
## friends_count_wave_1:strong_partisan_wave_1
## 6
## birth_year:north_central
## 6
## family_income:northeast
## 6
## family_income:party_id_wave_1
## 6
## ideo_homogeneity_offline:northeast
## 6
## ideo_homogeneity_offline:strong_partisan_wave_1
## 6
## south:freq_twitter_wave_1
## 6
## freq_twitter_wave_1:political_wave_1
## 6
## family_income.2
## 6
## gender:north_central
## 11
## substantive_ideology_scale_wave_1:gender
## 12
## percent_co_party:family_income
## 12
## birth_year:education
## 12
## education.2
## 12
## substantive_ideology_scale_wave_1:freq_twitter_wave_1
## 14
## northeast:strong_partisan_wave_1
## 15
## percent_co_party:ideo_homogeneity_offline
## 18
## party_id_wave_1:political_wave_1
## 20
## (Intercept)
## 22
## substantive_ideology_scale_wave_1
## 22
## strong_partisan_wave_1
## 22
## political_wave_1
## 22
## substantive_ideology_scale_wave_1:northeast
## 22
## substantive_ideology_scale_wave_1:party_id_wave_1
## 22
## percent_co_party:birth_year
## 22
## gender:political_wave_1
## 22
## south:political_wave_1
## 22
## strong_partisan_wave_1:party_id_wave_1
## 22
## ideo_homogeneity_offline.2
## 22
## substantive_ideology_scale_wave_1.2
## 22
print(vars.x)
## [1] "(Intercept)"
## [2] "substantive_ideology_scale_wave_1"
## [3] "strong_partisan_wave_1"
## [4] "political_wave_1"
## [5] "substantive_ideology_scale_wave_1:northeast"
## [6] "substantive_ideology_scale_wave_1:party_id_wave_1"
## [7] "percent_co_party:birth_year"
## [8] "percent_co_party:family_income"
## [9] "percent_co_party:ideo_homogeneity_offline"
## [10] "birth_year:education"
## [11] "gender:north_central"
## [12] "gender:political_wave_1"
## [13] "northeast:strong_partisan_wave_1"
## [14] "south:political_wave_1"
## [15] "strong_partisan_wave_1:party_id_wave_1"
## [16] "ideo_homogeneity_offline.2"
## [17] "education.2"
## [18] "substantive_ideology_scale_wave_1.2"
to_include_dems <- c(vars.x[-1]) # can add variables of importance here
Lasso Republicans
###### LASSO FOR REPUBLICANS
# try LASSO model to select instruments
library(glmnet)
# create hyperparameter grid
hyper_grid <- expand.grid(
alpha = c(0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1),
nfolds = c(5,10)
)
# create nonzero results grid
non_zero.x <- matrix(nrow = ncol(republicans),ncol = 22)
set.seed(1)
for(i in 1:nrow(hyper_grid)){
lasso.fit.x <- cv.glmnet(republicans, df$Y[gop], weights = df$weights[gop],
lambda = seq(0.001,0.1,by = 0.001),
alpha = hyper_grid$alpha[i],
nfolds = hyper_grid$nfolds[i])
coef.x <- coef(lasso.fit.x, s = "lambda.min")
coef.x[which(coef.x != 0)]
vars.x <- coef.x@Dimnames[[1]][which(coef.x != 0 ) ]
print(vars.x) ### should do cool graph here
non_zero.x[,i] <- colnames(democrats) %in% vars.x
}
## [1] "(Intercept)"
## [2] "substantive_ideology_scale_wave_1"
## [3] "percent_co_party"
## [4] "friends_count_wave_1"
## [5] "birth_year"
## [6] "family_income"
## [7] "education"
## [8] "gender"
## [9] "ideo_homogeneity_offline"
## [10] "northeast"
## [11] "north_central"
## [12] "south"
## [13] "strong_partisan_wave_1"
## [14] "freq_twitter_wave_1"
## [15] "political_wave_1"
## [16] "substantive_ideology_scale_wave_1:percent_co_party"
## [17] "substantive_ideology_scale_wave_1:friends_count_wave_1"
## [18] "substantive_ideology_scale_wave_1:birth_year"
## [19] "substantive_ideology_scale_wave_1:family_income"
## [20] "substantive_ideology_scale_wave_1:education"
## [21] "substantive_ideology_scale_wave_1:gender"
## [22] "substantive_ideology_scale_wave_1:ideo_homogeneity_offline"
## [23] "substantive_ideology_scale_wave_1:northeast"
## [24] "substantive_ideology_scale_wave_1:north_central"
## [25] "substantive_ideology_scale_wave_1:south"
## [26] "substantive_ideology_scale_wave_1:strong_partisan_wave_1"
## [27] "substantive_ideology_scale_wave_1:freq_twitter_wave_1"
## [28] "substantive_ideology_scale_wave_1:party_id_wave_1"
## [29] "substantive_ideology_scale_wave_1:political_wave_1"
## [30] "percent_co_party:friends_count_wave_1"
## [31] "percent_co_party:birth_year"
## [32] "percent_co_party:family_income"
## [33] "percent_co_party:education"
## [34] "percent_co_party:gender"
## [35] "percent_co_party:ideo_homogeneity_offline"
## [36] "percent_co_party:northeast"
## [37] "percent_co_party:north_central"
## [38] "percent_co_party:south"
## [39] "percent_co_party:strong_partisan_wave_1"
## [40] "percent_co_party:freq_twitter_wave_1"
## [41] "percent_co_party:party_id_wave_1"
## [42] "percent_co_party:political_wave_1"
## [43] "friends_count_wave_1:birth_year"
## [44] "friends_count_wave_1:family_income"
## [45] "friends_count_wave_1:education"
## [46] "friends_count_wave_1:gender"
## [47] "friends_count_wave_1:ideo_homogeneity_offline"
## [48] "friends_count_wave_1:northeast"
## [49] "friends_count_wave_1:north_central"
## [50] "friends_count_wave_1:south"
## [51] "friends_count_wave_1:strong_partisan_wave_1"
## [52] "friends_count_wave_1:freq_twitter_wave_1"
## [53] "friends_count_wave_1:party_id_wave_1"
## [54] "friends_count_wave_1:political_wave_1"
## [55] "birth_year:family_income"
## [56] "birth_year:education"
## [57] "birth_year:gender"
## [58] "birth_year:ideo_homogeneity_offline"
## [59] "birth_year:northeast"
## [60] "birth_year:north_central"
## [61] "birth_year:south"
## [62] "birth_year:strong_partisan_wave_1"
## [63] "birth_year:freq_twitter_wave_1"
## [64] "birth_year:party_id_wave_1"
## [65] "birth_year:political_wave_1"
## [66] "family_income:education"
## [67] "family_income:gender"
## [68] "family_income:ideo_homogeneity_offline"
## [69] "family_income:northeast"
## [70] "family_income:north_central"
## [71] "family_income:south"
## [72] "family_income:strong_partisan_wave_1"
## [73] "family_income:freq_twitter_wave_1"
## [74] "family_income:party_id_wave_1"
## [75] "family_income:political_wave_1"
## [76] "education:gender"
## [77] "education:ideo_homogeneity_offline"
## [78] "education:northeast"
## [79] "education:north_central"
## [80] "education:south"
## [81] "education:strong_partisan_wave_1"
## [82] "education:freq_twitter_wave_1"
## [83] "education:party_id_wave_1"
## [84] "education:political_wave_1"
## [85] "gender:ideo_homogeneity_offline"
## [86] "gender:northeast"
## [87] "gender:north_central"
## [88] "gender:south"
## [89] "gender:strong_partisan_wave_1"
## [90] "gender:freq_twitter_wave_1"
## [91] "gender:party_id_wave_1"
## [92] "gender:political_wave_1"
## [93] "ideo_homogeneity_offline:northeast"
## [94] "ideo_homogeneity_offline:north_central"
## [95] "ideo_homogeneity_offline:south"
## [96] "ideo_homogeneity_offline:strong_partisan_wave_1"
## [97] "ideo_homogeneity_offline:freq_twitter_wave_1"
## [98] "ideo_homogeneity_offline:party_id_wave_1"
## [99] "ideo_homogeneity_offline:political_wave_1"
## [100] "northeast:strong_partisan_wave_1"
## [101] "northeast:freq_twitter_wave_1"
## [102] "northeast:party_id_wave_1"
## [103] "northeast:political_wave_1"
## [104] "north_central:south"
## [105] "north_central:strong_partisan_wave_1"
## [106] "north_central:freq_twitter_wave_1"
## [107] "north_central:party_id_wave_1"
## [108] "north_central:political_wave_1"
## [109] "south:strong_partisan_wave_1"
## [110] "south:freq_twitter_wave_1"
## [111] "south:party_id_wave_1"
## [112] "south:political_wave_1"
## [113] "strong_partisan_wave_1:freq_twitter_wave_1"
## [114] "strong_partisan_wave_1:party_id_wave_1"
## [115] "strong_partisan_wave_1:political_wave_1"
## [116] "freq_twitter_wave_1:party_id_wave_1"
## [117] "freq_twitter_wave_1:political_wave_1"
## [118] "party_id_wave_1:political_wave_1"
## [119] "percent_co_party.2"
## [120] "ideo_homogeneity_offline.2"
## [121] "friends_count_wave_1.2"
## [122] "education.2"
## [123] "family_income.2"
## [124] "birth_year.2"
## [125] "substantive_ideology_scale_wave_1.2"
## [1] "(Intercept)"
## [2] "substantive_ideology_scale_wave_1"
## [3] "percent_co_party"
## [4] "strong_partisan_wave_1"
## [5] "freq_twitter_wave_1"
## [6] "political_wave_1"
## [7] "substantive_ideology_scale_wave_1:family_income"
## [8] "substantive_ideology_scale_wave_1:education"
## [9] "substantive_ideology_scale_wave_1:gender"
## [10] "substantive_ideology_scale_wave_1:ideo_homogeneity_offline"
## [11] "substantive_ideology_scale_wave_1:south"
## [12] "substantive_ideology_scale_wave_1:freq_twitter_wave_1"
## [13] "substantive_ideology_scale_wave_1:party_id_wave_1"
## [14] "substantive_ideology_scale_wave_1:political_wave_1"
## [15] "percent_co_party:friends_count_wave_1"
## [16] "percent_co_party:education"
## [17] "percent_co_party:gender"
## [18] "percent_co_party:ideo_homogeneity_offline"
## [19] "percent_co_party:north_central"
## [20] "percent_co_party:party_id_wave_1"
## [21] "percent_co_party:political_wave_1"
## [22] "friends_count_wave_1:family_income"
## [23] "friends_count_wave_1:strong_partisan_wave_1"
## [24] "birth_year:northeast"
## [25] "birth_year:south"
## [26] "birth_year:strong_partisan_wave_1"
## [27] "birth_year:political_wave_1"
## [28] "family_income:education"
## [29] "family_income:gender"
## [30] "family_income:ideo_homogeneity_offline"
## [31] "family_income:northeast"
## [32] "family_income:north_central"
## [33] "family_income:strong_partisan_wave_1"
## [34] "family_income:freq_twitter_wave_1"
## [35] "education:northeast"
## [36] "education:north_central"
## [37] "education:south"
## [38] "education:strong_partisan_wave_1"
## [39] "gender:south"
## [40] "gender:political_wave_1"
## [41] "ideo_homogeneity_offline:north_central"
## [42] "ideo_homogeneity_offline:strong_partisan_wave_1"
## [43] "northeast:strong_partisan_wave_1"
## [44] "north_central:south"
## [45] "south:strong_partisan_wave_1"
## [46] "south:freq_twitter_wave_1"
## [47] "south:political_wave_1"
## [48] "strong_partisan_wave_1:freq_twitter_wave_1"
## [49] "strong_partisan_wave_1:party_id_wave_1"
## [50] "strong_partisan_wave_1:political_wave_1"
## [51] "freq_twitter_wave_1:party_id_wave_1"
## [52] "party_id_wave_1:political_wave_1"
## [53] "substantive_ideology_scale_wave_1.2"
## [1] "(Intercept)"
## [2] "substantive_ideology_scale_wave_1"
## [3] "percent_co_party"
## [4] "strong_partisan_wave_1"
## [5] "political_wave_1"
## [6] "substantive_ideology_scale_wave_1:family_income"
## [7] "substantive_ideology_scale_wave_1:education"
## [8] "substantive_ideology_scale_wave_1:gender"
## [9] "substantive_ideology_scale_wave_1:south"
## [10] "substantive_ideology_scale_wave_1:freq_twitter_wave_1"
## [11] "substantive_ideology_scale_wave_1:party_id_wave_1"
## [12] "percent_co_party:education"
## [13] "percent_co_party:north_central"
## [14] "percent_co_party:south"
## [15] "percent_co_party:party_id_wave_1"
## [16] "percent_co_party:political_wave_1"
## [17] "birth_year:northeast"
## [18] "birth_year:political_wave_1"
## [19] "family_income:education"
## [20] "family_income:gender"
## [21] "family_income:ideo_homogeneity_offline"
## [22] "family_income:northeast"
## [23] "family_income:freq_twitter_wave_1"
## [24] "education:northeast"
## [25] "gender:south"
## [26] "gender:political_wave_1"
## [27] "northeast:strong_partisan_wave_1"
## [28] "strong_partisan_wave_1:freq_twitter_wave_1"
## [29] "strong_partisan_wave_1:party_id_wave_1"
## [30] "party_id_wave_1:political_wave_1"
## [31] "substantive_ideology_scale_wave_1.2"
## [1] "(Intercept)"
## [2] "substantive_ideology_scale_wave_1"
## [3] "percent_co_party"
## [4] "strong_partisan_wave_1"
## [5] "political_wave_1"
## [6] "substantive_ideology_scale_wave_1:family_income"
## [7] "substantive_ideology_scale_wave_1:education"
## [8] "substantive_ideology_scale_wave_1:gender"
## [9] "substantive_ideology_scale_wave_1:south"
## [10] "substantive_ideology_scale_wave_1:freq_twitter_wave_1"
## [11] "substantive_ideology_scale_wave_1:party_id_wave_1"
## [12] "percent_co_party:south"
## [13] "percent_co_party:party_id_wave_1"
## [14] "percent_co_party:political_wave_1"
## [15] "birth_year:northeast"
## [16] "birth_year:political_wave_1"
## [17] "family_income:ideo_homogeneity_offline"
## [18] "family_income:northeast"
## [19] "northeast:strong_partisan_wave_1"
## [20] "strong_partisan_wave_1:freq_twitter_wave_1"
## [21] "strong_partisan_wave_1:party_id_wave_1"
## [22] "party_id_wave_1:political_wave_1"
## [23] "substantive_ideology_scale_wave_1.2"
## [1] "(Intercept)"
## [2] "substantive_ideology_scale_wave_1"
## [3] "percent_co_party"
## [4] "strong_partisan_wave_1"
## [5] "political_wave_1"
## [6] "substantive_ideology_scale_wave_1:family_income"
## [7] "substantive_ideology_scale_wave_1:education"
## [8] "substantive_ideology_scale_wave_1:freq_twitter_wave_1"
## [9] "substantive_ideology_scale_wave_1:party_id_wave_1"
## [10] "percent_co_party:south"
## [11] "percent_co_party:party_id_wave_1"
## [12] "percent_co_party:political_wave_1"
## [13] "birth_year:northeast"
## [14] "birth_year:political_wave_1"
## [15] "family_income:ideo_homogeneity_offline"
## [16] "family_income:northeast"
## [17] "northeast:strong_partisan_wave_1"
## [18] "strong_partisan_wave_1:freq_twitter_wave_1"
## [19] "strong_partisan_wave_1:party_id_wave_1"
## [20] "party_id_wave_1:political_wave_1"
## [1] "(Intercept)"
## [2] "substantive_ideology_scale_wave_1"
## [3] "percent_co_party"
## [4] "strong_partisan_wave_1"
## [5] "political_wave_1"
## [6] "substantive_ideology_scale_wave_1:family_income"
## [7] "substantive_ideology_scale_wave_1:education"
## [8] "substantive_ideology_scale_wave_1:freq_twitter_wave_1"
## [9] "substantive_ideology_scale_wave_1:party_id_wave_1"
## [10] "percent_co_party:south"
## [11] "percent_co_party:party_id_wave_1"
## [12] "percent_co_party:political_wave_1"
## [13] "birth_year:northeast"
## [14] "birth_year:political_wave_1"
## [15] "family_income:ideo_homogeneity_offline"
## [16] "family_income:northeast"
## [17] "northeast:strong_partisan_wave_1"
## [18] "strong_partisan_wave_1:freq_twitter_wave_1"
## [19] "party_id_wave_1:political_wave_1"
## [1] "(Intercept)"
## [2] "substantive_ideology_scale_wave_1"
## [3] "percent_co_party"
## [4] "political_wave_1"
## [5] "substantive_ideology_scale_wave_1:family_income"
## [6] "substantive_ideology_scale_wave_1:education"
## [7] "substantive_ideology_scale_wave_1:freq_twitter_wave_1"
## [8] "substantive_ideology_scale_wave_1:party_id_wave_1"
## [9] "percent_co_party:south"
## [10] "percent_co_party:party_id_wave_1"
## [11] "percent_co_party:political_wave_1"
## [12] "birth_year:northeast"
## [13] "birth_year:political_wave_1"
## [14] "family_income:ideo_homogeneity_offline"
## [15] "northeast:strong_partisan_wave_1"
## [16] "strong_partisan_wave_1:freq_twitter_wave_1"
## [17] "party_id_wave_1:political_wave_1"
## [1] "(Intercept)"
## [2] "substantive_ideology_scale_wave_1"
## [3] "political_wave_1"
## [4] "substantive_ideology_scale_wave_1:party_id_wave_1"
## [5] "percent_co_party:south"
## [6] "percent_co_party:political_wave_1"
## [7] "birth_year:northeast"
## [8] "birth_year:political_wave_1"
## [9] "strong_partisan_wave_1:freq_twitter_wave_1"
## [10] "party_id_wave_1:political_wave_1"
## [1] "(Intercept)"
## [2] "substantive_ideology_scale_wave_1"
## [3] "political_wave_1"
## [4] "substantive_ideology_scale_wave_1:party_id_wave_1"
## [5] "percent_co_party:political_wave_1"
## [6] "birth_year:northeast"
## [7] "birth_year:political_wave_1"
## [8] "strong_partisan_wave_1:freq_twitter_wave_1"
## [9] "party_id_wave_1:political_wave_1"
## [1] "(Intercept)"
## [2] "substantive_ideology_scale_wave_1"
## [3] "political_wave_1"
## [4] "substantive_ideology_scale_wave_1:party_id_wave_1"
## [5] "percent_co_party:south"
## [6] "percent_co_party:political_wave_1"
## [7] "birth_year:northeast"
## [8] "strong_partisan_wave_1:freq_twitter_wave_1"
## [9] "party_id_wave_1:political_wave_1"
## [1] "(Intercept)"
## [2] "substantive_ideology_scale_wave_1"
## [3] "political_wave_1"
## [4] "substantive_ideology_scale_wave_1:party_id_wave_1"
## [5] "percent_co_party:south"
## [6] "percent_co_party:political_wave_1"
## [7] "birth_year:northeast"
## [8] "strong_partisan_wave_1:freq_twitter_wave_1"
## [9] "party_id_wave_1:political_wave_1"
## [1] "(Intercept)"
## [2] "substantive_ideology_scale_wave_1"
## [3] "percent_co_party"
## [4] "friends_count_wave_1"
## [5] "birth_year"
## [6] "family_income"
## [7] "education"
## [8] "gender"
## [9] "ideo_homogeneity_offline"
## [10] "northeast"
## [11] "north_central"
## [12] "south"
## [13] "strong_partisan_wave_1"
## [14] "freq_twitter_wave_1"
## [15] "political_wave_1"
## [16] "substantive_ideology_scale_wave_1:percent_co_party"
## [17] "substantive_ideology_scale_wave_1:friends_count_wave_1"
## [18] "substantive_ideology_scale_wave_1:birth_year"
## [19] "substantive_ideology_scale_wave_1:family_income"
## [20] "substantive_ideology_scale_wave_1:education"
## [21] "substantive_ideology_scale_wave_1:gender"
## [22] "substantive_ideology_scale_wave_1:ideo_homogeneity_offline"
## [23] "substantive_ideology_scale_wave_1:northeast"
## [24] "substantive_ideology_scale_wave_1:north_central"
## [25] "substantive_ideology_scale_wave_1:south"
## [26] "substantive_ideology_scale_wave_1:strong_partisan_wave_1"
## [27] "substantive_ideology_scale_wave_1:freq_twitter_wave_1"
## [28] "substantive_ideology_scale_wave_1:party_id_wave_1"
## [29] "substantive_ideology_scale_wave_1:political_wave_1"
## [30] "percent_co_party:friends_count_wave_1"
## [31] "percent_co_party:birth_year"
## [32] "percent_co_party:family_income"
## [33] "percent_co_party:education"
## [34] "percent_co_party:gender"
## [35] "percent_co_party:ideo_homogeneity_offline"
## [36] "percent_co_party:northeast"
## [37] "percent_co_party:north_central"
## [38] "percent_co_party:south"
## [39] "percent_co_party:strong_partisan_wave_1"
## [40] "percent_co_party:freq_twitter_wave_1"
## [41] "percent_co_party:party_id_wave_1"
## [42] "percent_co_party:political_wave_1"
## [43] "friends_count_wave_1:birth_year"
## [44] "friends_count_wave_1:family_income"
## [45] "friends_count_wave_1:education"
## [46] "friends_count_wave_1:gender"
## [47] "friends_count_wave_1:ideo_homogeneity_offline"
## [48] "friends_count_wave_1:northeast"
## [49] "friends_count_wave_1:north_central"
## [50] "friends_count_wave_1:south"
## [51] "friends_count_wave_1:strong_partisan_wave_1"
## [52] "friends_count_wave_1:freq_twitter_wave_1"
## [53] "friends_count_wave_1:party_id_wave_1"
## [54] "friends_count_wave_1:political_wave_1"
## [55] "birth_year:family_income"
## [56] "birth_year:education"
## [57] "birth_year:gender"
## [58] "birth_year:ideo_homogeneity_offline"
## [59] "birth_year:northeast"
## [60] "birth_year:north_central"
## [61] "birth_year:south"
## [62] "birth_year:strong_partisan_wave_1"
## [63] "birth_year:freq_twitter_wave_1"
## [64] "birth_year:party_id_wave_1"
## [65] "birth_year:political_wave_1"
## [66] "family_income:education"
## [67] "family_income:gender"
## [68] "family_income:ideo_homogeneity_offline"
## [69] "family_income:northeast"
## [70] "family_income:north_central"
## [71] "family_income:south"
## [72] "family_income:strong_partisan_wave_1"
## [73] "family_income:freq_twitter_wave_1"
## [74] "family_income:party_id_wave_1"
## [75] "family_income:political_wave_1"
## [76] "education:gender"
## [77] "education:ideo_homogeneity_offline"
## [78] "education:northeast"
## [79] "education:north_central"
## [80] "education:south"
## [81] "education:strong_partisan_wave_1"
## [82] "education:freq_twitter_wave_1"
## [83] "education:party_id_wave_1"
## [84] "education:political_wave_1"
## [85] "gender:ideo_homogeneity_offline"
## [86] "gender:northeast"
## [87] "gender:north_central"
## [88] "gender:south"
## [89] "gender:strong_partisan_wave_1"
## [90] "gender:freq_twitter_wave_1"
## [91] "gender:party_id_wave_1"
## [92] "gender:political_wave_1"
## [93] "ideo_homogeneity_offline:northeast"
## [94] "ideo_homogeneity_offline:north_central"
## [95] "ideo_homogeneity_offline:south"
## [96] "ideo_homogeneity_offline:strong_partisan_wave_1"
## [97] "ideo_homogeneity_offline:freq_twitter_wave_1"
## [98] "ideo_homogeneity_offline:party_id_wave_1"
## [99] "ideo_homogeneity_offline:political_wave_1"
## [100] "northeast:strong_partisan_wave_1"
## [101] "northeast:freq_twitter_wave_1"
## [102] "northeast:party_id_wave_1"
## [103] "northeast:political_wave_1"
## [104] "north_central:south"
## [105] "north_central:strong_partisan_wave_1"
## [106] "north_central:freq_twitter_wave_1"
## [107] "north_central:party_id_wave_1"
## [108] "north_central:political_wave_1"
## [109] "south:strong_partisan_wave_1"
## [110] "south:freq_twitter_wave_1"
## [111] "south:party_id_wave_1"
## [112] "south:political_wave_1"
## [113] "strong_partisan_wave_1:freq_twitter_wave_1"
## [114] "strong_partisan_wave_1:party_id_wave_1"
## [115] "strong_partisan_wave_1:political_wave_1"
## [116] "freq_twitter_wave_1:party_id_wave_1"
## [117] "freq_twitter_wave_1:political_wave_1"
## [118] "party_id_wave_1:political_wave_1"
## [119] "percent_co_party.2"
## [120] "ideo_homogeneity_offline.2"
## [121] "friends_count_wave_1.2"
## [122] "education.2"
## [123] "family_income.2"
## [124] "birth_year.2"
## [125] "substantive_ideology_scale_wave_1.2"
## [1] "(Intercept)"
## [2] "substantive_ideology_scale_wave_1"
## [3] "percent_co_party"
## [4] "strong_partisan_wave_1"
## [5] "freq_twitter_wave_1"
## [6] "political_wave_1"
## [7] "substantive_ideology_scale_wave_1:family_income"
## [8] "substantive_ideology_scale_wave_1:education"
## [9] "substantive_ideology_scale_wave_1:gender"
## [10] "substantive_ideology_scale_wave_1:ideo_homogeneity_offline"
## [11] "substantive_ideology_scale_wave_1:south"
## [12] "substantive_ideology_scale_wave_1:freq_twitter_wave_1"
## [13] "substantive_ideology_scale_wave_1:party_id_wave_1"
## [14] "substantive_ideology_scale_wave_1:political_wave_1"
## [15] "percent_co_party:friends_count_wave_1"
## [16] "percent_co_party:education"
## [17] "percent_co_party:gender"
## [18] "percent_co_party:ideo_homogeneity_offline"
## [19] "percent_co_party:north_central"
## [20] "percent_co_party:party_id_wave_1"
## [21] "percent_co_party:political_wave_1"
## [22] "friends_count_wave_1:family_income"
## [23] "friends_count_wave_1:strong_partisan_wave_1"
## [24] "birth_year:northeast"
## [25] "birth_year:south"
## [26] "birth_year:strong_partisan_wave_1"
## [27] "birth_year:political_wave_1"
## [28] "family_income:education"
## [29] "family_income:gender"
## [30] "family_income:ideo_homogeneity_offline"
## [31] "family_income:northeast"
## [32] "family_income:north_central"
## [33] "family_income:strong_partisan_wave_1"
## [34] "family_income:freq_twitter_wave_1"
## [35] "education:northeast"
## [36] "education:north_central"
## [37] "education:south"
## [38] "education:strong_partisan_wave_1"
## [39] "gender:south"
## [40] "gender:political_wave_1"
## [41] "ideo_homogeneity_offline:north_central"
## [42] "ideo_homogeneity_offline:strong_partisan_wave_1"
## [43] "northeast:strong_partisan_wave_1"
## [44] "north_central:south"
## [45] "south:strong_partisan_wave_1"
## [46] "south:freq_twitter_wave_1"
## [47] "south:political_wave_1"
## [48] "strong_partisan_wave_1:freq_twitter_wave_1"
## [49] "strong_partisan_wave_1:party_id_wave_1"
## [50] "strong_partisan_wave_1:political_wave_1"
## [51] "freq_twitter_wave_1:party_id_wave_1"
## [52] "party_id_wave_1:political_wave_1"
## [53] "substantive_ideology_scale_wave_1.2"
## [1] "(Intercept)"
## [2] "substantive_ideology_scale_wave_1"
## [3] "percent_co_party"
## [4] "strong_partisan_wave_1"
## [5] "political_wave_1"
## [6] "substantive_ideology_scale_wave_1:family_income"
## [7] "substantive_ideology_scale_wave_1:education"
## [8] "substantive_ideology_scale_wave_1:gender"
## [9] "substantive_ideology_scale_wave_1:south"
## [10] "substantive_ideology_scale_wave_1:freq_twitter_wave_1"
## [11] "substantive_ideology_scale_wave_1:party_id_wave_1"
## [12] "percent_co_party:education"
## [13] "percent_co_party:north_central"
## [14] "percent_co_party:south"
## [15] "percent_co_party:party_id_wave_1"
## [16] "percent_co_party:political_wave_1"
## [17] "birth_year:northeast"
## [18] "birth_year:political_wave_1"
## [19] "family_income:education"
## [20] "family_income:gender"
## [21] "family_income:ideo_homogeneity_offline"
## [22] "family_income:northeast"
## [23] "family_income:freq_twitter_wave_1"
## [24] "education:northeast"
## [25] "gender:south"
## [26] "gender:political_wave_1"
## [27] "northeast:strong_partisan_wave_1"
## [28] "strong_partisan_wave_1:freq_twitter_wave_1"
## [29] "strong_partisan_wave_1:party_id_wave_1"
## [30] "party_id_wave_1:political_wave_1"
## [31] "substantive_ideology_scale_wave_1.2"
## [1] "(Intercept)"
## [2] "substantive_ideology_scale_wave_1"
## [3] "percent_co_party"
## [4] "strong_partisan_wave_1"
## [5] "political_wave_1"
## [6] "substantive_ideology_scale_wave_1:family_income"
## [7] "substantive_ideology_scale_wave_1:education"
## [8] "substantive_ideology_scale_wave_1:south"
## [9] "substantive_ideology_scale_wave_1:freq_twitter_wave_1"
## [10] "substantive_ideology_scale_wave_1:party_id_wave_1"
## [11] "percent_co_party:south"
## [12] "percent_co_party:party_id_wave_1"
## [13] "percent_co_party:political_wave_1"
## [14] "birth_year:northeast"
## [15] "birth_year:political_wave_1"
## [16] "family_income:education"
## [17] "family_income:ideo_homogeneity_offline"
## [18] "family_income:northeast"
## [19] "family_income:freq_twitter_wave_1"
## [20] "education:northeast"
## [21] "northeast:strong_partisan_wave_1"
## [22] "strong_partisan_wave_1:freq_twitter_wave_1"
## [23] "strong_partisan_wave_1:party_id_wave_1"
## [24] "party_id_wave_1:political_wave_1"
## [1] "(Intercept)"
## [2] "substantive_ideology_scale_wave_1"
## [3] "percent_co_party"
## [4] "strong_partisan_wave_1"
## [5] "political_wave_1"
## [6] "substantive_ideology_scale_wave_1:family_income"
## [7] "substantive_ideology_scale_wave_1:education"
## [8] "substantive_ideology_scale_wave_1:freq_twitter_wave_1"
## [9] "substantive_ideology_scale_wave_1:party_id_wave_1"
## [10] "percent_co_party:south"
## [11] "percent_co_party:party_id_wave_1"
## [12] "percent_co_party:political_wave_1"
## [13] "birth_year:northeast"
## [14] "birth_year:political_wave_1"
## [15] "family_income:ideo_homogeneity_offline"
## [16] "northeast:strong_partisan_wave_1"
## [17] "strong_partisan_wave_1:freq_twitter_wave_1"
## [18] "strong_partisan_wave_1:party_id_wave_1"
## [19] "party_id_wave_1:political_wave_1"
## [1] "(Intercept)"
## [2] "substantive_ideology_scale_wave_1"
## [3] "percent_co_party"
## [4] "strong_partisan_wave_1"
## [5] "political_wave_1"
## [6] "substantive_ideology_scale_wave_1:family_income"
## [7] "substantive_ideology_scale_wave_1:education"
## [8] "substantive_ideology_scale_wave_1:freq_twitter_wave_1"
## [9] "substantive_ideology_scale_wave_1:party_id_wave_1"
## [10] "percent_co_party:south"
## [11] "percent_co_party:party_id_wave_1"
## [12] "percent_co_party:political_wave_1"
## [13] "birth_year:northeast"
## [14] "birth_year:political_wave_1"
## [15] "family_income:ideo_homogeneity_offline"
## [16] "strong_partisan_wave_1:freq_twitter_wave_1"
## [17] "strong_partisan_wave_1:party_id_wave_1"
## [18] "party_id_wave_1:political_wave_1"
## [1] "(Intercept)"
## [2] "substantive_ideology_scale_wave_1"
## [3] "percent_co_party"
## [4] "political_wave_1"
## [5] "substantive_ideology_scale_wave_1:family_income"
## [6] "substantive_ideology_scale_wave_1:education"
## [7] "substantive_ideology_scale_wave_1:freq_twitter_wave_1"
## [8] "substantive_ideology_scale_wave_1:party_id_wave_1"
## [9] "percent_co_party:south"
## [10] "percent_co_party:party_id_wave_1"
## [11] "percent_co_party:political_wave_1"
## [12] "birth_year:northeast"
## [13] "birth_year:political_wave_1"
## [14] "family_income:ideo_homogeneity_offline"
## [15] "strong_partisan_wave_1:freq_twitter_wave_1"
## [16] "party_id_wave_1:political_wave_1"
## [1] "(Intercept)"
## [2] "substantive_ideology_scale_wave_1"
## [3] "percent_co_party"
## [4] "political_wave_1"
## [5] "substantive_ideology_scale_wave_1:education"
## [6] "substantive_ideology_scale_wave_1:party_id_wave_1"
## [7] "percent_co_party:south"
## [8] "percent_co_party:party_id_wave_1"
## [9] "percent_co_party:political_wave_1"
## [10] "birth_year:northeast"
## [11] "birth_year:political_wave_1"
## [12] "strong_partisan_wave_1:freq_twitter_wave_1"
## [13] "party_id_wave_1:political_wave_1"
## [1] "(Intercept)"
## [2] "substantive_ideology_scale_wave_1"
## [3] "political_wave_1"
## [4] "substantive_ideology_scale_wave_1:party_id_wave_1"
## [5] "percent_co_party:south"
## [6] "percent_co_party:political_wave_1"
## [7] "birth_year:northeast"
## [8] "birth_year:political_wave_1"
## [9] "strong_partisan_wave_1:freq_twitter_wave_1"
## [10] "party_id_wave_1:political_wave_1"
## [1] "(Intercept)"
## [2] "substantive_ideology_scale_wave_1"
## [3] "political_wave_1"
## [4] "substantive_ideology_scale_wave_1:education"
## [5] "substantive_ideology_scale_wave_1:party_id_wave_1"
## [6] "percent_co_party:south"
## [7] "percent_co_party:political_wave_1"
## [8] "birth_year:northeast"
## [9] "birth_year:political_wave_1"
## [10] "family_income:ideo_homogeneity_offline"
## [11] "strong_partisan_wave_1:freq_twitter_wave_1"
## [12] "party_id_wave_1:political_wave_1"
## [1] "(Intercept)"
## [2] "substantive_ideology_scale_wave_1"
## [3] "political_wave_1"
## [4] "substantive_ideology_scale_wave_1:party_id_wave_1"
## [5] "percent_co_party:political_wave_1"
## [6] "strong_partisan_wave_1:freq_twitter_wave_1"
## [7] "party_id_wave_1:political_wave_1"
# later examine number of times each covariate shows up in models
## now we decide which variables to add
rownames(non_zero.x) <- colnames(df.x.mod)
sort(apply(non_zero.x,1,sum))
## party_id_wave_1
## 0
## northeast:north_central
## 0
## northeast:south
## 0
## friends_count_wave_1
## 2
## birth_year
## 2
## family_income
## 2
## education
## 2
## gender
## 2
## ideo_homogeneity_offline
## 2
## northeast
## 2
## north_central
## 2
## south
## 2
## substantive_ideology_scale_wave_1:percent_co_party
## 2
## substantive_ideology_scale_wave_1:friends_count_wave_1
## 2
## substantive_ideology_scale_wave_1:birth_year
## 2
## substantive_ideology_scale_wave_1:northeast
## 2
## substantive_ideology_scale_wave_1:north_central
## 2
## substantive_ideology_scale_wave_1:strong_partisan_wave_1
## 2
## percent_co_party:birth_year
## 2
## percent_co_party:family_income
## 2
## percent_co_party:northeast
## 2
## percent_co_party:strong_partisan_wave_1
## 2
## percent_co_party:freq_twitter_wave_1
## 2
## friends_count_wave_1:birth_year
## 2
## friends_count_wave_1:education
## 2
## friends_count_wave_1:gender
## 2
## friends_count_wave_1:ideo_homogeneity_offline
## 2
## friends_count_wave_1:northeast
## 2
## friends_count_wave_1:north_central
## 2
## friends_count_wave_1:south
## 2
## friends_count_wave_1:freq_twitter_wave_1
## 2
## friends_count_wave_1:party_id_wave_1
## 2
## friends_count_wave_1:political_wave_1
## 2
## birth_year:family_income
## 2
## birth_year:education
## 2
## birth_year:gender
## 2
## birth_year:ideo_homogeneity_offline
## 2
## birth_year:north_central
## 2
## birth_year:freq_twitter_wave_1
## 2
## birth_year:party_id_wave_1
## 2
## family_income:south
## 2
## family_income:party_id_wave_1
## 2
## family_income:political_wave_1
## 2
## education:gender
## 2
## education:ideo_homogeneity_offline
## 2
## education:freq_twitter_wave_1
## 2
## education:party_id_wave_1
## 2
## education:political_wave_1
## 2
## gender:ideo_homogeneity_offline
## 2
## gender:northeast
## 2
## gender:north_central
## 2
## gender:strong_partisan_wave_1
## 2
## gender:freq_twitter_wave_1
## 2
## gender:party_id_wave_1
## 2
## ideo_homogeneity_offline:northeast
## 2
## ideo_homogeneity_offline:south
## 2
## ideo_homogeneity_offline:freq_twitter_wave_1
## 2
## ideo_homogeneity_offline:party_id_wave_1
## 2
## ideo_homogeneity_offline:political_wave_1
## 2
## northeast:freq_twitter_wave_1
## 2
## northeast:party_id_wave_1
## 2
## northeast:political_wave_1
## 2
## north_central:strong_partisan_wave_1
## 2
## north_central:freq_twitter_wave_1
## 2
## north_central:party_id_wave_1
## 2
## north_central:political_wave_1
## 2
## south:party_id_wave_1
## 2
## freq_twitter_wave_1:political_wave_1
## 2
## percent_co_party.2
## 2
## ideo_homogeneity_offline.2
## 2
## friends_count_wave_1.2
## 2
## education.2
## 2
## family_income.2
## 2
## birth_year.2
## 2
## freq_twitter_wave_1
## 4
## substantive_ideology_scale_wave_1:ideo_homogeneity_offline
## 4
## substantive_ideology_scale_wave_1:political_wave_1
## 4
## percent_co_party:friends_count_wave_1
## 4
## percent_co_party:gender
## 4
## percent_co_party:ideo_homogeneity_offline
## 4
## friends_count_wave_1:family_income
## 4
## friends_count_wave_1:strong_partisan_wave_1
## 4
## birth_year:south
## 4
## birth_year:strong_partisan_wave_1
## 4
## family_income:north_central
## 4
## family_income:strong_partisan_wave_1
## 4
## education:north_central
## 4
## education:south
## 4
## education:strong_partisan_wave_1
## 4
## ideo_homogeneity_offline:north_central
## 4
## ideo_homogeneity_offline:strong_partisan_wave_1
## 4
## north_central:south
## 4
## south:strong_partisan_wave_1
## 4
## south:freq_twitter_wave_1
## 4
## south:political_wave_1
## 4
## strong_partisan_wave_1:political_wave_1
## 4
## freq_twitter_wave_1:party_id_wave_1
## 4
## percent_co_party:education
## 6
## percent_co_party:north_central
## 6
## family_income:gender
## 6
## gender:south
## 6
## gender:political_wave_1
## 6
## substantive_ideology_scale_wave_1:gender
## 7
## family_income:education
## 7
## family_income:freq_twitter_wave_1
## 7
## education:northeast
## 7
## substantive_ideology_scale_wave_1.2
## 7
## substantive_ideology_scale_wave_1:south
## 8
## family_income:northeast
## 10
## strong_partisan_wave_1:party_id_wave_1
## 11
## strong_partisan_wave_1
## 12
## northeast:strong_partisan_wave_1
## 12
## substantive_ideology_scale_wave_1:family_income
## 14
## substantive_ideology_scale_wave_1:freq_twitter_wave_1
## 14
## percent_co_party
## 15
## percent_co_party:party_id_wave_1
## 15
## family_income:ideo_homogeneity_offline
## 15
## substantive_ideology_scale_wave_1:education
## 16
## percent_co_party:south
## 18
## birth_year:political_wave_1
## 19
## birth_year:northeast
## 21
## (Intercept)
## 22
## substantive_ideology_scale_wave_1
## 22
## political_wave_1
## 22
## substantive_ideology_scale_wave_1:party_id_wave_1
## 22
## percent_co_party:political_wave_1
## 22
## strong_partisan_wave_1:freq_twitter_wave_1
## 22
## party_id_wave_1:political_wave_1
## 22
#print(vars.x)
to_include_gop <- c(vars.x[-1]) # can add variables of importance here```
#Note that the `echo = FALSE` parameter was added to the code chunk to prevent printing of the R code that generated the plot.
Two Stage Least Squares
## for 2SLS
df.2sls <- data.frame(cbind(df.x.mod,df$Y,df$W,df$perfect_complier,
df$half_complier,df$bot_followers,df$bin_maker))
colnames(df.2sls) <- c(colnames(df.x.mod),"Y","W","perfect_complier",
"half_complier","bot_followers","bin_maker")
# include outcome and instrument variables now
democrats <- df.2sls[dems,]
republicans <- df.2sls[gop,]
datasets <- list(democrats,republicans)
to_include <- list(to_include_dems,to_include_gop)
# re-create functions to calculate CACE with vars from LASSO
# use vars[-1] to remove intercept
CACE_int_fc<-function(data, to_include){ ## *consider manually inputting main effects*
results<-ivreg(paste("Y ~perfect_complier+",
paste(to_include, collapse = "+"), "+as.factor(bin_maker)| W +",
paste(to_include, collapse = "+"),"+as.factor(bin_maker)"),
data = data)
#calculate cluster robust standard errors
data$bin_maker<-as.numeric(data$bin_maker)
output<-cluster.robust.se(results, data$bin_maker)[2,]
return(output)}
CACE_int_hc<-function(data, to_include){ ## *consider manually inputting main effects*
results<-ivreg(paste("Y ~half_complier+",
paste(to_include, collapse = "+"),"+as.factor(bin_maker)| W +",
paste(to_include, collapse = "+"),"+as.factor(bin_maker)"),
data = data)
#calculate cluster robust standard errors
data$bin_maker<-as.numeric(data$bin_maker)
output<-cluster.robust.se(results, data$bin_maker)[2,]
return(output)}
CACE_int_bf<-function(data, to_include){ ## *consider manually inputting main effects*
results<-ivreg(paste("Y ~bot_followers+",
paste(to_include, collapse = "+"),"+as.factor(bin_maker)| W +",
paste(to_include, collapse = "+"),"+as.factor(bin_maker)"),
data = data)
#calculate cluster robust standard errors
data$bin_maker<-as.numeric(data$bin_maker)
output<-cluster.robust.se(results, data$bin_maker)[2,]
return(output)}
full_compliance_int_models <- mapply(CACE_int_fc,datasets,to_include)
## [1] "Cluster Robust Standard Errors"
## [1] "Cluster Robust Standard Errors"
half_compliance_int_models <- mapply(CACE_int_hc,datasets,to_include)
## [1] "Cluster Robust Standard Errors"
## [1] "Cluster Robust Standard Errors"
bot_follower_int_models <- mapply(CACE_int_bf,datasets,to_include)
## [1] "Cluster Robust Standard Errors"
## [1] "Cluster Robust Standard Errors"
CACE Results
republican_full_compliance_int_cace<-as.data.frame(t(full_compliance_int_models[,2]))
republican_full_compliance_int_cace$sample<-"republicans_full_compliance"
republican_full_compliance_int_cace$party<-"republicans"
names(republican_full_compliance_int_cace)<-c("estimate","se","t","p","sample","party")
republican_half_compliance_int_cace<-as.data.frame(t(half_compliance_int_models[,2]))
republican_half_compliance_int_cace$sample<-"republicans_half_compliance"
republican_half_compliance_int_cace$party<-"republicans"
names(republican_half_compliance_int_cace)<-c("estimate","se","t","p","sample","party")
republican_bot_follower_int_cace<-as.data.frame(t(bot_follower_int_models[,2]))
republican_bot_follower_int_cace$sample<-"republicans_bot_follower"
republican_bot_follower_int_cace$party<-"republicans"
names(republican_bot_follower_int_cace)<-c("estimate","se","t","p","sample","party")
#extract results for democrats
democrat_full_compliance_int_cace<-data.frame(t(full_compliance_int_models[,1]))
democrat_full_compliance_int_cace$sample<-"democrats_full_compliance"
democrat_full_compliance_int_cace$party<-"democrats"
names(democrat_full_compliance_int_cace)<-c("estimate","se","t","p","sample","party")
democrat_half_compliance_int_cace<-as.data.frame(t(half_compliance_int_models[,1]))
democrat_half_compliance_int_cace$sample<-"democrats_half_compliance"
democrat_half_compliance_int_cace$party<-"democrats"
names(democrat_half_compliance_int_cace)<-c("estimate","se","t","p","sample","party")
democrat_bot_follower_int_cace<-as.data.frame(t(bot_follower_int_models[,1]))
democrat_bot_follower_int_cace$sample<-"democrats_bot_follower"
democrat_bot_follower_int_cace$party<-"democrats"
names(democrat_bot_follower_int_cace)<-c("estimate","se","t","p","sample","party")
CACE Plot
#create another dataset that combines ITT and CACE results for plotting
republican_itt<-
data.frame(t(summary(republican_ITT_int_model, cluster="bin_maker")$coefficients[2:2,]))
names(republican_itt)<-c("estimate","se","t","p")
republican_itt$sample<-"republicans_itt"
republican_itt$party<-"republicans"
democrat_itt<-
data.frame(t(summary(democrat_ITT_int_model, cluster="bin_maker")$coefficients[2:2,]))
names(democrat_itt)<-c("estimate","se","t","p")
democrat_itt$sample<-"democrats_itt"
democrat_itt$party<-"democrats"
republican_int_plot<-rbind(republican_full_compliance_int_cace,
republican_half_compliance_int_cace,
republican_bot_follower_int_cace,
republican_itt)
democrat_int_plot<-rbind(democrat_full_compliance_int_cace,
democrat_half_compliance_int_cace,
democrat_bot_follower_int_cace,
democrat_itt)
republican_int_plot$sample<-factor(republican_int_plot$sample,
levels=c("republicans_full_compliance",
"republicans_half_compliance",
"republicans_bot_follower",
"republicans_itt"),
labels=c("Fully Compliant Respondents",
"Partially Compliant Respondents",
"Minimally Compliant Respondents",
"Respondents Assigned to Treatment"))
democrat_int_plot$sample<-factor(democrat_int_plot$sample,
levels=c("democrats_full_compliance",
"democrats_half_compliance",
"democrats_bot_follower",
"democrats_itt"),
labels=c("Fully Compliant Respondents",
"Partially Compliant Respondents",
"Minimally Compliant Respondents",
"Respondents Assigned to Treatment"))
#create standard error bars
interval1 <- -qnorm((1-0.9)/2) # 90% multiplier
interval2 <- -qnorm((1-0.95)/2) # 95% multiplier
#create plot
library(ggplot2)
##
## Attaching package: 'ggplot2'
## The following objects are masked from 'package:psych':
##
## %+%, alpha
figure_2_dems<-ggplot(democrat_int_plot)+
geom_hline(yintercept = 0, colour = gray(1/2), lty = 2)+
geom_point(aes(x=sample, y=estimate),
position = position_dodge(width = 1/2),
size=2, colour="blue")+
geom_linerange(aes(x = sample, ymin = estimate - se*interval1,
ymax = estimate + se*interval1),
lwd = 1, position = position_dodge(width = 1/2),
colour="blue")+
geom_linerange(aes(x = sample, y = estimate, ymin = estimate - se*interval2,
ymax = estimate + se*interval2),
lwd = .5, position = position_dodge(width = 1/2),
colour="blue")+
theme(axis.text=element_text(size=9, face="bold",colour="black"),
plot.title = element_text(face="bold", size=16, hjust = 0.5,vjust=3),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.background=element_blank(),
axis.title=element_text(size=9, colour="black"),
legend.position="none",
legend.key = element_blank(),
legend.title=element_blank())+
ylim(c(-1,1))+
labs(x="",y="")+
coord_flip()+
ggtitle(fig_title("Democrats"))
## Warning: Ignoring unknown aesthetics: y
figure_2_reps<-ggplot(republican_int_plot)+
geom_hline(yintercept = 0, colour = gray(1/2), lty = 2)+
geom_point(aes(x=sample, y=estimate),
position = position_dodge(width = 1/2),
size=2, colour="red")+
geom_linerange(aes(x = sample, ymin = estimate - se*interval1,
ymax = estimate + se*interval1),
lwd = 1, position = position_dodge(width = 1/2),
colour="red")+
geom_linerange(aes(x = sample, y = estimate, ymin = estimate - se*interval2,
ymax = estimate + se*interval2),
lwd = .5, position = position_dodge(width = 1/2),
colour="red")+
theme(axis.text=element_text(size=9, face="bold",colour="black"),
plot.title = element_text(face="bold", size=16, hjust = 0.5, vjust=3),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.background=element_blank(),
axis.title=element_text(size=9, colour="black"),
legend.position="none",
legend.key = element_blank(),
legend.title=element_blank())+
labs(x="",y="")+
ylim(c(-1,1))+
coord_flip()+
ggtitle(fig_title("Republicans"))
## Warning: Ignoring unknown aesthetics: y
figure_2_dems

figure_2_reps

ggsave(figure_2_dems, file="Figure 2 dems.png", width=7, height=4, dpi=1000)
ggsave(figure_2_reps, file="Figure 2 reps.png", width=7, height=4, dpi=1000)
5. AIPW
Augmented Inverse Propensity Weighting is used to estimate the treatment effects for “full compliers.” The Lasso model is used to estimate both propensities and outcomes, and it is assessed over a grid of Lambda values.
AIPW Setup
###############################################################
## AIPW on full compliance
###############################################################
# for each group, restrict sample to those who were treated
dems = c(df$party_id_wave_1 == 1 & df$W == 1)
gop = c(df$party_id_wave_1 == 2 & df$W == 1)
# will use democrats and republicans full compliance,
# even though we are getting into small n sizes here
full_comp = as.numeric(as.character(df$perfect_complier))
full_comp.dem = full_comp[dems]
full_comp.rep = full_comp[gop]
democrats <- df.x.mod[dems,]
republicans <- df.x.mod[gop,]
AIPW Democrats
#### democrats
# propensities
democrats.propensity = amlinear:::crossfit.cv.glmnet(
democrats, full_comp.dem,family = "binomial",keep = TRUE,
weights = df$weights[dems],
lambda = seq(0.001,0.1,by = 0.001))
theta.hat = amlinear:::crossfit.predict(democrats.propensity)
p_lasso = 1/(1 + exp(-theta.hat))
# inverse propensity score weighting
G <- ((full_comp.dem - p_lasso) * df$Y[dems]) /
(p_lasso * (1 - p_lasso))
tau.hat <- mean(G)
se.hat <- sqrt(var(G) / (length(G) - 1))
c(ATE=tau.hat, lower_ci = tau.hat - 1.96 * se.hat, upper_ci = tau.hat + 1.96 * se.hat)
## ATE lower_ci upper_ci
## -0.2750413 -0.9068217 0.3567391
# outcomes
Xmod.dem = cbind(full_comp.dem,democrats,
(2 * full_comp.dem - 1) * democrats)
democrats.outcome = amlinear:::crossfit.cv.glmnet(
Xmod.dem, df$Y[dems], keep = TRUE,
weights = df$weights[dems],
penalty.factor = c(0, rep(1, ncol(Xmod.dem) - 1)),
lambda = seq(0.001,0.1,by = 0.001)) # for simplicity, use same lambda grid
yhat.noncomp.dem = amlinear:::crossfit.predict(democrats.outcome,
cbind(0, democrats, -democrats))
yhat.comp.dem = amlinear:::crossfit.predict(democrats.outcome,
cbind(1, democrats, democrats))
# estimate effects
mean(yhat.comp.dem - yhat.noncomp.dem) # estimate ATE
## [1] 0.02783164
G = yhat.comp.dem - yhat.noncomp.dem + # augmented ipw
full_comp.dem / p_lasso *
(df$Y[dems] - yhat.comp.dem) -
(1 - full_comp.dem) / (1 - p_lasso) *
(df$Y[dems] - yhat.noncomp.dem)
tau.hat = mean(G)
se.hat = sqrt(var(G) / length(G))
tauhat_lasso_aipw = c(ATE=tau.hat,
lower_ci=tau.hat-1.96*se.hat,
upper_ci=tau.hat+1.96*se.hat)
tauhat_lasso_aipw
## ATE lower_ci upper_ci
## 0.05203412 -0.04894128 0.15300951
Lambda Grid Democrats
# examine predictions of ATE from grid of various lambda values
p_lasso_grid <- democrats.propensity$fit.preval
democrats.outcome.comp = amlinear:::crossfit.cv.glmnet(
Xmod.dem[full_comp.dem == 1,], df$Y[dems][full_comp.dem == 1], keep = TRUE,
weights = df$weights[dems][full_comp.dem == 1],
lambda = democrats.propensity$lambda) # for simplicity, use same lambda grid
democrats.outcome.noncomp = amlinear:::crossfit.cv.glmnet(
Xmod.dem[full_comp.dem == 0,], df$Y[dems][full_comp.dem == 0], keep = TRUE,
weights = df$weights[dems][full_comp.dem == 0],
lambda = democrats.propensity$lambda) # for simplicity, use same lambda grid
# cross fitted predictions on control and treated groups
cross_fit_comp = democrats.outcome.comp$fit.preval
cross_fit_noncomp = democrats.outcome.noncomp$fit.preval
# predictions on control group from treated model, and vice versa
democrats_predict_comp = predict.cv.glmnet(democrats.outcome.comp,
newx = Xmod.dem[full_comp.dem == 0,],
s = democrats.propensity$lambda)
democrats_predict_noncomp = predict.cv.glmnet(democrats.outcome.noncomp,
newx = Xmod.dem[full_comp.dem == 1,],
s = democrats.propensity$lambda)
# combine predictions
yhat.comp.dem_grid = rbind(cross_fit_noncomp,democrats_predict_noncomp)
yhat.noncomp.dem_grid = rbind(cross_fit_comp,democrats_predict_comp)
# estimate ATE grid
tauhat_lasso_ols_grid = apply(yhat.comp.dem_grid -
yhat.noncomp.dem_grid,2,mean)
library(ggplot2)
ggplot(mapping = aes(x = democrats.propensity$lambda, y = tauhat_lasso_ols_grid))+
geom_line() + xlab("Lambda") + ylab("Tau Hat") + ggtitle(fig_title("Lasso ATE by Lambda Values"))

G_grid = yhat.comp.dem_grid - yhat.noncomp.dem_grid + # augmented ipw
full_comp.dem / p_lasso_grid *
(df$Y[dems] - yhat.comp.dem_grid) -
(1 - full_comp.dem) / (1 - p_lasso_grid) *
(df$Y[dems] - yhat.noncomp.dem_grid)
tau.hat_grid = apply(G_grid, 2, mean)
se.hat_grid = apply(G_grid, 2, function(x) sqrt(var(x) / length(x)))
tauhat_lasso_aipw_grid = rbind(ATE=tau.hat_grid,
lower_ci=tau.hat_grid-1.96*se.hat_grid,
upper_ci=tau.hat_grid+1.96*se.hat_grid)
tauhat_lasso_aipw_grid
## 1 2 3 4 5
## ATE 0.19367713 0.19395150 0.19421819 0.19460987 0.19506864
## lower_ci -0.05105801 -0.05110167 -0.05118444 -0.05111431 -0.05097443
## upper_ci 0.43841228 0.43900468 0.43962082 0.44033405 0.44111170
## 6 7 8 9 10
## ATE 0.19563072 0.19618703 0.19675711 0.19730368 0.19786752
## lower_ci -0.05075151 -0.05053896 -0.05031869 -0.05011824 -0.04990541
## upper_ci 0.44201295 0.44291303 0.44383290 0.44472560 0.44564045
## 11 12 13 14 15
## ATE 0.19844284 0.19903767 0.1999154 0.20091367 0.20191583
## lower_ci -0.04968742 -0.04945636 -0.0489934 -0.04843362 -0.04788074
## upper_ci 0.44657311 0.44753170 0.4488242 0.45026095 0.45171239
## 16 17 18 19 20
## ATE 0.20288151 0.20383829 0.20466211 0.20537598 0.20609732
## lower_ci -0.04736563 -0.04687692 -0.04658316 -0.04638031 -0.04618064
## upper_ci 0.45312864 0.45455351 0.45590737 0.45713228 0.45837528
## 21 22 23 24 25
## ATE 0.20694895 0.20786119 0.20879005 0.20972200 0.21061607
## lower_ci -0.04597628 -0.04578224 -0.04558689 -0.04539792 -0.04525014
## upper_ci 0.45987418 0.46150461 0.46316699 0.46484191 0.46648228
## 26 27 28 29 30
## ATE 0.21151832 0.21237840 0.21311325 0.21387620 0.21462946
## lower_ci -0.04508731 -0.04494833 -0.04500909 -0.04509582 -0.04520156
## upper_ci 0.46812395 0.46970514 0.47123560 0.47284821 0.47446048
## 31 32 33 34 35
## ATE 0.2155296 0.21676673 0.21800648 0.21943238 0.22105809
## lower_ci -0.0452600 -0.04511213 -0.04501159 -0.04482438 -0.04452538
## upper_ci 0.4763193 0.47864558 0.48102456 0.48368914 0.48664155
## 36 37 38 39 40
## ATE 0.22247762 0.22374665 0.22504710 0.22652852 0.22818528
## lower_ci -0.04435583 -0.04430629 -0.04426453 -0.04401078 -0.04350843
## upper_ci 0.48931106 0.49179959 0.49435873 0.49706782 0.49987900
## 41 42 43 44 45
## ATE 0.22983766 0.23144337 0.23298855 0.23472614 0.23640501
## lower_ci -0.04304662 -0.04269243 -0.04250429 -0.04221522 -0.04192671
## upper_ci 0.50272195 0.50557917 0.50848140 0.51166750 0.51473673
## 46 47 48 49 50
## ATE 0.23779015 0.23924243 0.24079499 0.24207248 0.24261448
## lower_ci -0.04184565 -0.04177272 -0.04164623 -0.04183592 -0.04257558
## upper_ci 0.51742595 0.52025757 0.52323621 0.52598088 0.52780453
## 51 52 53 54 55
## ATE 0.24317938 0.24398148 0.2449908 0.24592678 0.24664199
## lower_ci -0.04345971 -0.04417657 -0.0448155 -0.04590539 -0.04728063
## upper_ci 0.52981847 0.53213954 0.5347971 0.53775894 0.54056461
## 56 57 58 59 60
## ATE 0.24686495 0.24719824 0.24683544 0.24640924 0.24614443
## lower_ci -0.04914303 -0.05089211 -0.05329506 -0.05578224 -0.05829716
## upper_ci 0.54287293 0.54528859 0.54696594 0.54860071 0.55058601
## 61 62 63 64 65
## ATE 0.24627024 0.24591154 0.24531380 0.24487014 0.24463672
## lower_ci -0.06033446 -0.06309194 -0.06629761 -0.06944154 -0.07230459
## upper_ci 0.55287494 0.55491502 0.55692522 0.55918182 0.56157804
## 66 67 68 69 70
## ATE 0.24480455 0.24553550 0.24795189 0.25063342 0.25389480
## lower_ci -0.07539874 -0.07851833 -0.08063727 -0.08312465 -0.08591152
## upper_ci 0.56500784 0.56958934 0.57654105 0.58439149 0.59370112
## 71 72 73 74 75
## ATE 0.25816412 0.26247360 0.26581254 0.2683005 0.2684164
## lower_ci -0.08860777 -0.09185586 -0.09540904 -0.1004282 -0.1077531
## upper_ci 0.60493602 0.61680306 0.62703412 0.6370291 0.6445858
## 76 77 78 79 80 81
## ATE 0.2675123 0.2656679 0.2642859 0.2629072 0.2562281 0.2488556
## lower_ci -0.1165072 -0.1258329 -0.1353641 -0.1452951 -0.1586326 -0.1735568
## upper_ci 0.6515318 0.6571688 0.6639359 0.6711094 0.6710887 0.6712680
## 82 83 84 85 86 87
## ATE 0.2446518 0.2419016 0.2374322 0.2362481 0.2337689 0.2313140
## lower_ci -0.1860716 -0.1963470 -0.2094798 -0.2216419 -0.2378523 -0.2586355
## upper_ci 0.6753752 0.6801503 0.6843441 0.6941380 0.7053901 0.7212635
## 88 89 90 91 92 93
## ATE 0.2330857 0.2386550 0.2470183 0.2490893 0.2454772 0.2375329
## lower_ci -0.2822273 -0.3085899 -0.3420567 -0.3965019 -0.4885655 -0.6461573
## upper_ci 0.7483987 0.7858999 0.8360934 0.8946805 0.9795200 1.1212231
## 94 95 96 97 98 99
## ATE 0.2288651 0.1375694 0.05243345 -1.526320 -14.12960 -163.0843
## lower_ci -0.9832778 -1.8052919 -3.55881233 -11.113143 -56.94303 -553.9782
## upper_ci 1.4410081 2.0804306 3.66367922 8.060502 28.68382 227.8096
## 100
## ATE -847.0080
## lower_ci -2404.0935
## upper_ci 710.0775
## plot tauhat grid results
ggplot()+
geom_line(mapping = aes(x = democrats.propensity$lambda, y = tauhat_lasso_aipw_grid[1,]))+
geom_line(mapping = aes(x = democrats.propensity$lambda, y = tauhat_lasso_aipw_grid[2,]),color = "green")+
geom_line(mapping = aes(x = democrats.propensity$lambda, y = tauhat_lasso_aipw_grid[3,]),color = "green")+
xlab("Lambda") + ylab("Tau Hat") + ylim(c(-10,10)) + ggtitle(fig_title("Estimated AIPW Treatment Effect by Lambda"))
## Warning: Removed 3 rows containing missing values (geom_path).
## Warning: Removed 4 rows containing missing values (geom_path).
## Warning: Removed 3 rows containing missing values (geom_path).

AIPW Republicans
#### republicans
# propensities
republicans.propensity = amlinear:::crossfit.cv.glmnet(
republicans, full_comp.rep,family = "binomial",keep = TRUE,
weights = df$weights[gop],
lambda = seq(0.001,0.1,by = 0.001))
theta.hat = amlinear:::crossfit.predict(republicans.propensity)
p_lasso = 1/(1 + exp(-theta.hat))
# inverse propensity score weighting
G <- ((full_comp.rep - p_lasso) * df$Y[gop]) /
(p_lasso * (1 - p_lasso))
tau.hat <- mean(G)
se.hat <- sqrt(var(G) / (length(G) - 1))
c(ATE=tau.hat, lower_ci = tau.hat - 1.96 * se.hat, upper_ci = tau.hat + 1.96 * se.hat)
## ATE lower_ci upper_ci
## -0.5733562 -1.9757520 0.8290395
# outcomes
Xmod.rep = cbind(full_comp.rep,republicans,
(2 * full_comp.rep - 1) * republicans)
republicans.outcome = amlinear:::crossfit.cv.glmnet(
Xmod.rep, df$Y[gop], keep = TRUE,
weights = df$weights[gop],
penalty.factor = c(0, rep(1, ncol(Xmod.rep) - 1)),
lambda = seq(0.001,0.1,by = 0.001)) # for simplicity, use same lambda grid
yhat.noncomp.rep = amlinear:::crossfit.predict(republicans.outcome,
cbind(0, republicans, -republicans))
yhat.comp.rep = amlinear:::crossfit.predict(republicans.outcome,
cbind(1, republicans, republicans))
# estimate effects
mean(yhat.comp.rep - yhat.noncomp.rep) # estimate ATE
## [1] -0.1405493
G = yhat.comp.rep - yhat.noncomp.rep + # augmented ipw
full_comp.rep / p_lasso *
(df$Y[gop] - yhat.comp.rep) -
(1 - full_comp.rep) / (1 - p_lasso) *
(df$Y[gop] - yhat.noncomp.rep)
tau.hat = mean(G)
se.hat = sqrt(var(G) / length(G))
tauhat_lasso_aipw = c(ATE=tau.hat,
lower_ci=tau.hat-1.96*se.hat,
upper_ci=tau.hat+1.96*se.hat)
tauhat_lasso_aipw
## ATE lower_ci upper_ci
## -0.18217149 -0.31846921 -0.04587376
Lambda Grid Republicans
# examine predictions of ATE from grid of various lambda values
p_lasso_grid <- republicans.propensity$fit.preval
republicans.outcome.comp = amlinear:::crossfit.cv.glmnet(
Xmod.rep[full_comp.rep == 1,], df$Y[gop][full_comp.rep == 1], keep = TRUE,
weights = df$weights[gop][full_comp.rep == 1],
lambda = republicans.propensity$lambda) # for simplicity, use same lambda grid
republicans.outcome.noncomp = amlinear:::crossfit.cv.glmnet(
Xmod.rep[full_comp.rep == 0,], df$Y[gop][full_comp.rep == 0], keep = TRUE,
weights = df$weights[gop][full_comp.rep == 0],
lambda = republicans.propensity$lambda) # for simplicity, use same lambda grid
# cross fitted predictions on control and treated groups
cross_fit_comp = republicans.outcome.comp$fit.preval
cross_fit_noncomp = republicans.outcome.noncomp$fit.preval
# predictions on control group from treated model, and vice versa
republicans_predict_comp = predict.cv.glmnet(republicans.outcome.comp,
newx = Xmod.rep[full_comp.rep == 0,],
s = republicans.propensity$lambda)
republicans_predict_noncomp = predict.cv.glmnet(republicans.outcome.noncomp,
newx = Xmod.rep[full_comp.rep == 1,],
s = republicans.propensity$lambda)
# combine predictions
yhat.comp.rep_grid = rbind(cross_fit_noncomp,republicans_predict_noncomp)
yhat.noncomp.rep_grid = rbind(cross_fit_comp,republicans_predict_comp)
# estimate ATE grid
tauhat_lasso_ols_grid = apply(yhat.comp.rep_grid -
yhat.noncomp.rep_grid,2,mean)
library(ggplot2)
ggplot(mapping = aes(x = republicans.propensity$lambda, y = tauhat_lasso_ols_grid))+
geom_line() + xlab("Lambda") + ylab("Tau Hat") + ggtitle("Lasso ATE by Lambda Values")

G_grid = yhat.comp.rep_grid - yhat.noncomp.rep_grid + # augmented ipw
full_comp.rep / p_lasso_grid *
(df$Y[gop] - yhat.comp.rep_grid) -
(1 - full_comp.rep) / (1 - p_lasso_grid) *
(df$Y[gop] - yhat.noncomp.rep_grid)
tau.hat_grid = apply(G_grid, 2, mean)
se.hat_grid = apply(G_grid, 2, function(x) sqrt(var(x) / length(x)))
tauhat_lasso_aipw_grid = rbind(ATE=tau.hat_grid,
lower_ci=tau.hat_grid-1.96*se.hat_grid,
upper_ci=tau.hat_grid+1.96*se.hat_grid)
tauhat_lasso_aipw_grid
## 1 2 3 4 5
## ATE -0.33954121 -0.33851687 -0.33773414 -0.33711553 -0.33640625
## lower_ci -0.66077158 -0.65953990 -0.65863234 -0.65797704 -0.65707916
## upper_ci -0.01831083 -0.01749383 -0.01683594 -0.01625402 -0.01573334
## 6 7 8 9 10
## ATE -0.33560709 -0.33481155 -0.33403872 -0.3332809 -0.3324549
## lower_ci -0.65607726 -0.65509834 -0.65415588 -0.6532482 -0.6522173
## upper_ci -0.01513692 -0.01452476 -0.01392156 -0.0133135 -0.0126924
## 11 12 13 14 15
## ATE -0.33165371 -0.33093481 -0.33021787 -0.32955322 -0.32888446
## lower_ci -0.65117208 -0.65021969 -0.64928929 -0.64842588 -0.64757397
## upper_ci -0.01213534 -0.01164992 -0.01114646 -0.01068056 -0.01019494
## 16 17 18 19 20
## ATE -0.328185723 -0.327514700 -0.326856866 -0.326200080 -0.325618306
## lower_ci -0.646707808 -0.645874653 -0.645083091 -0.644312459 -0.643562428
## upper_ci -0.009663638 -0.009154748 -0.008630642 -0.008087702 -0.007674184
## 21 22 23 24 25
## ATE -0.325071400 -0.324652201 -0.324560467 -0.324615571 -0.324669943
## lower_ci -0.642876162 -0.642309217 -0.641983299 -0.641844084 -0.641724242
## upper_ci -0.007266638 -0.006995184 -0.007137636 -0.007387058 -0.007615644
## 26 27 28 29 30
## ATE -0.324713378 -0.324768886 -0.324959381 -0.325228124 -0.324594299
## lower_ci -0.641619811 -0.641548818 -0.641649515 -0.641848325 -0.640631425
## upper_ci -0.007806944 -0.007988954 -0.008269246 -0.008607924 -0.008557173
## 31 32 33 34 35
## ATE -0.323615151 -0.322684949 -0.321764912 -0.320861350 -0.319979934
## lower_ci -0.638880746 -0.637211566 -0.635585525 -0.634033547 -0.632557210
## upper_ci -0.008349556 -0.008158332 -0.007944299 -0.007689154 -0.007402658
## 36 37 38 39 40
## ATE -0.319100158 -0.318593969 -0.317887167 -0.317302738 -0.316772571
## lower_ci -0.631143641 -0.630380772 -0.629643612 -0.629135602 -0.628764697
## upper_ci -0.007056675 -0.006807166 -0.006130723 -0.005469874 -0.004780446
## 41 42 43 44
## ATE -0.316141944 -0.315433953 -0.314749940 -0.3140916933
## lower_ci -0.628343509 -0.627907661 -0.627546683 -0.6272631401
## upper_ci -0.003940378 -0.002960246 -0.001953196 -0.0009202465
## 45 46 47 48 49
## ATE -0.3134493682 -0.312777640 -0.312146839 -0.311529460 -0.310880702
## lower_ci -0.6270755076 -0.626979254 -0.627011303 -0.627221031 -0.627664677
## upper_ci 0.0001767713 0.001423973 0.002717624 0.004162112 0.005903273
## 50 51 52 53 54
## ATE -0.310332191 -0.309930952 -0.30974756 -0.30986511 -0.31006456
## lower_ci -0.628334660 -0.629181059 -0.63028686 -0.63181540 -0.63335083
## upper_ci 0.007670277 0.009319154 0.01079174 0.01208518 0.01322172
## 55 56 57 58 59
## ATE -0.31078859 -0.31242702 -0.31425791 -0.31622435 -0.31848901
## lower_ci -0.63555080 -0.63915837 -0.64306919 -0.64741912 -0.65229780
## upper_ci 0.01397362 0.01430433 0.01455337 0.01497043 0.01531978
## 60 61 62 63 64
## ATE -0.32138685 -0.3248731 -0.32846868 -0.33331483 -0.33838673
## lower_ci -0.65806207 -0.6650527 -0.67222544 -0.68131264 -0.69074373
## upper_ci 0.01528838 0.0153064 0.01528808 0.01468298 0.01397026
## 65 66 67 68 69
## ATE -0.34364265 -0.34909870 -0.35390951 -0.35923521 -0.36599022
## lower_ci -0.70104458 -0.71181916 -0.72219540 -0.73347575 -0.74738620
## upper_ci 0.01375929 0.01362176 0.01437638 0.01500532 0.01540577
## 70 71 72 73 74
## ATE -0.37295698 -0.38099899 -0.39039816 -0.4005447 -0.41021360
## lower_ci -0.76175908 -0.77785995 -0.79630340 -0.8162773 -0.83599710
## upper_ci 0.01584512 0.01586197 0.01550708 0.0151879 0.01556991
## 75 76 77 78 79
## ATE -0.42031606 -0.43018489 -0.43736099 -0.4366269 -0.43204145
## lower_ci -0.85708406 -0.87896349 -0.89822475 -0.9071859 -0.91068350
## upper_ci 0.01645193 0.01859371 0.02350277 0.0339320 0.04660059
## 80 81 82 83 84
## ATE -0.42727267 -0.41310736 -0.39515304 -0.3771607 -0.3625413
## lower_ci -0.91503045 -0.90269057 -0.88695326 -0.8738357 -0.8688839
## upper_ci 0.06048512 0.07647585 0.09664718 0.1195143 0.1438012
## 85 86 87 88 89 90
## ATE -0.3502964 -0.3358345 -0.3241847 -0.3070115 -0.2859353 -0.268623
## lower_ci -0.8722131 -0.8756114 -0.8862905 -0.8972869 -0.9246608 -0.972235
## upper_ci 0.1716203 0.2039425 0.2379211 0.2832639 0.3527902 0.434989
## 91 92 93 94 95 96
## ATE -0.2578307 -0.2590486 -0.2644901 -0.4660162 -0.8627474 -1.508330
## lower_ci -1.0535907 -1.1764228 -1.3583567 -1.6263905 -2.1610131 -3.241440
## upper_ci 0.5379293 0.6583256 0.8293766 0.6943581 0.4355184 0.224781
## 97 98 99 100
## ATE -2.5575206 -10.054817 -53.91021 538.4492
## lower_ci -5.4244788 -25.299565 -168.26835 -896.1135
## upper_ci 0.3094375 5.189931 60.44794 1973.0119
## plot tauhat grid results
ggplot()+
geom_line(mapping = aes(x = republicans.propensity$lambda, y = tauhat_lasso_aipw_grid[1,]))+
geom_line(mapping = aes(x = republicans.propensity$lambda, y = tauhat_lasso_aipw_grid[2,]),color = "green")+
geom_line(mapping = aes(x = republicans.propensity$lambda, y = tauhat_lasso_aipw_grid[3,]),color = "green")+
xlab("Lambda") + ylab("Tau Hat") + ylim(c(-10,10)) + ggtitle(fig_title("Estimated AIPW Treatment Effect by Lambda"))
## Warning: Removed 3 rows containing missing values (geom_path).
## Warning: Removed 3 rows containing missing values (geom_path).
## Warning: Removed 2 rows containing missing values (geom_path).

Descriptive statistics
#Creating table of descriptive statistics of covariates
#Subset of numeric variables
nums <- unlist(lapply(final_data, is.numeric))
summ_stats <- fBasics::basicStats(final_data[ , nums])
summ_stats <- as.data.frame(t(summ_stats))
summ_stats <- summ_stats[c("Mean", "Stdev", "Minimum", "1. Quartile", "Median", "3. Quartile", "Maximum")]
colnames(summ_stats)[colnames(summ_stats) %in% c('1. Quartile', '3. Quartile')] <- c('Lower quartile', 'Upper quartile')
#Table
options(kableExtra.latex.load_packages = FALSE)
#options(knitr.table.format = "latex")
library(kableExtra)
##
## Attaching package: 'kableExtra'
## The following object is masked from 'package:dplyr':
##
## group_rows
summ_stats_table <- kable(summ_stats, "html", digits = 2,
caption = tab_title("Descriptive Statistics"))
kable_styling(summ_stats_table,
bootstrap_options=c("striped", "hover", "condensed", "responsive"),
full_width=FALSE)
Table 3: Descriptive Statistics
|
|
Mean
|
Stdev
|
Minimum
|
Lower quartile
|
Median
|
Upper quartile
|
Maximum
|
|
treat
|
0.60
|
0.49
|
0.00
|
0.00
|
1.00
|
1.00
|
1.00
|
|
perfect_complier
|
0.10
|
0.30
|
0.00
|
0.00
|
0.00
|
0.00
|
1.00
|
|
half_complier
|
0.27
|
0.44
|
0.00
|
0.00
|
0.00
|
1.00
|
1.00
|
|
bot_followers
|
0.36
|
0.48
|
0.00
|
0.00
|
0.00
|
1.00
|
1.00
|
|
party_id_wave_1
|
1.43
|
0.50
|
1.00
|
1.00
|
1.00
|
2.00
|
2.00
|
|
strong_partisan_wave_1
|
0.57
|
0.50
|
0.00
|
0.00
|
1.00
|
1.00
|
1.00
|
|
substantive_ideology_scale_wave_5
|
3.37
|
1.52
|
1.00
|
2.00
|
3.22
|
4.60
|
7.00
|
|
substantive_ideology_scale_wave_1
|
3.39
|
1.48
|
1.00
|
2.10
|
3.30
|
4.50
|
7.00
|
|
percent_co_party
|
0.47
|
0.28
|
0.00
|
0.21
|
0.63
|
0.69
|
0.69
|
|
friends_count_wave_1
|
6.77
|
0.20
|
6.73
|
6.73
|
6.73
|
6.73
|
8.33
|
|
birth_year
|
1965.55
|
15.39
|
1933.00
|
1954.00
|
1964.00
|
1974.00
|
1995.00
|
|
family_income
|
7.30
|
3.44
|
1.00
|
5.00
|
7.00
|
10.00
|
16.00
|
|
education
|
4.56
|
1.29
|
1.00
|
3.00
|
5.00
|
6.00
|
6.00
|
|
gender
|
1.52
|
0.50
|
1.00
|
1.00
|
2.00
|
2.00
|
2.00
|
|
ideo_homogeneity_offline
|
62.79
|
21.53
|
0.00
|
50.00
|
60.00
|
80.00
|
100.00
|
|
northeast
|
0.20
|
0.40
|
0.00
|
0.00
|
0.00
|
0.00
|
1.00
|
|
north_central
|
0.20
|
0.40
|
0.00
|
0.00
|
0.00
|
0.00
|
1.00
|
|
south
|
0.39
|
0.49
|
0.00
|
0.00
|
0.00
|
1.00
|
1.00
|
|
west
|
0.21
|
0.41
|
0.00
|
0.00
|
0.00
|
0.00
|
1.00
|
6. Causal Tree
The Causal Tree is a decision-tree estimator that can be used to assess heterogeneity in treatment effects. Because the single tree is modeled using random samples of covariates, the results are sensitive to the seed in the R software.
Data Split
##Split the dataset
#We'll start by separating a portion of the dataset as a test set.
train_fraction <- 0.80
n <- dim(df)[1]
train_idx <- sample.int(n, replace=F, size=floor(n*train_fraction))
data_train <- df[train_idx,]
data_test <- df[-train_idx,]
#Create further subsets of the data into splitting, estimation, and validation samples
split_size <- floor(nrow(data_train) * 0.5)
split_idx <- sample(nrow(data_train), replace=FALSE, size=split_size)
data_split <- data_train[split_idx,]
data_est <- data_train[-split_idx,]
Fitting the tree, cross-validation, and point estimates
seed <- 1 # start seed at 1, find seed that splits data
num_leaves <- 1 # arbitrary, so that while loop works
while(num_leaves == 1){
##Fit the tree
set.seed(seed)
library(causalTree)
#Defining a formula containing outcome and covariates
fmla_ct <- paste(" Y ~", paste(covariates, collapse = " + "))
#CT
ct_unpruned <- honest.causalTree(
formula=fmla_ct,
data=data_split,
est_data=data_est,
treatment=data_split$W,
est_treatment=data_est$W,
split.Rule="CT",
cv.option="TOT",
split.Honest=TRUE,
cv.Honest=TRUE,
minsize=15,
HonestSampleSize=nrow(data_est)
)
##Cross Validation
ct_cptable <- as.data.frame(ct_unpruned$cptable)
#Optimal complexity parameter to prune tree
selected_cp <- which.min(ct_cptable$xerror)
optim_cp_ct <- ct_cptable[selected_cp, "CP"]
# Prune the tree at optimal complexity parameter.
ct_pruned <- prune(tree=ct_unpruned, cp=optim_cp_ct)
##Point estimates on estimation sample
tauhat_ct_est <- predict(ct_pruned, newdata=data_est)
##Standard errors
#Create a factor column indicating leaf assignment
num_leaves <- length(unique(tauhat_ct_est))
data_est$leaf <- factor(tauhat_ct_est, labels = seq(num_leaves))
#print(num_leaves)
seed <- seed + 1
}
## Loading required package: rpart
##
## Attaching package: 'rpart'
## The following object is masked from 'package:survival':
##
## solder
## Loading required package: rpart.plot
## Loading required package: data.table
##
## Attaching package: 'data.table'
## The following objects are masked from 'package:dplyr':
##
## between, first, last
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
## [1] 2
## [1] "CT"
Point estimates and standard errors
# Run the regression
ols_ct <- lm(as.formula("Y ~ 0 + leaf + W:leaf"), data=data_est)
#Table of estimates and standard errors by leaf
ols_ct_summary <- summary(ols_ct)
te_summary <- coef(ols_ct_summary)[(num_leaves+1):(2*num_leaves), c("Estimate", "Std. Error")]
print(te_summary)
## Estimate Std. Error
## leaf1:W -0.45243056 0.4181464
## leaf2:W -0.03359684 0.5006359
## leaf3:W 0.20366558 0.4145783
## leaf4:W 0.27407092 0.3380571
## leaf5:W 0.37760727 0.3687031
## leaf6:W 0.48298128 0.3659228
## leaf7:W 0.52777778 0.3831249
## leaf8:W 0.72357143 0.3998245
##Point estimates on test sample
tauhat_ct_test <- predict(ct_pruned, newdata=data_test)
Plotting tree
#Plot pruned tree
rpart.plot(
x=ct_pruned, # Pruned tree
type=3,
fallen=TRUE,
leaf.round=1,
extra=100,
branch=.1,
box.palette="RdBu")
title(main = fig_title("Causal Tree"))

Test heterogeneity across leaves
# Null hypothesis: all leaf values are the same
hypothesis <- paste0("leaf1:W = leaf", seq(2, num_leaves), ":W")
ftest <- linearHypothesis(ols_ct, hypothesis, test="F")
#Table
kable_styling(kable(data.frame(ftest, check.names = FALSE, row.names = NULL)[2,],
"html", digits = 4,
caption=tab_title("Testing null hypothesis:<br> Average treatment effect is same across leaves"), booktabs = TRUE),
bootstrap_options=c("striped", "hover", "condensed", "responsive"),
full_width=FALSE)
Table 4: Testing null hypothesis:
Average treatment effect is same across leaves
|
|
Res.Df
|
RSS
|
Df
|
Sum of Sq
|
F
|
Pr(>F)
|
|
2
|
397
|
740.4164
|
7
|
10.2772
|
0.7872
|
0.5983
|
Covariate heterogeneity
# Null hypothesis: the mean is equal across all leaves
hypothesis <- paste0("leaf1 = leaf", seq(2, num_leaves))
means_per_leaf <- matrix(nrow = num_leaves, ncol = num_leaves)
significance <- matrix(nrow = 2, ncol=length(covariates))
# Regress each covariate on leaf assignment to means p
cov_means <- lapply(covariates, function(covariate) {
lm(paste0(covariate, ' ~ 0 + leaf'), data = data_est)
})
# Extract the mean and standard deviation of each covariate per leaf
cov_table <- lapply(cov_means, function(cov_mean) {
as.data.frame(t(coef(summary(cov_mean))[,c("Estimate", "Std. Error")]))
})
# Test if means are the same across leaves
cov_ftests <- sapply(cov_means, function(cov_mean) {
# Sometimes the regression has no residual (SSE = 0),
# so we cannot perform an F-test
tryCatch({
linearHypothesis(cov_mean, hypothesis)[2, c("F", "Pr(>F)")]
},
error = function(cond) {
message(paste0("Error message during F-test for`", cov_mean$terms[[2]], "`:"))
message(cond)
return(c("F" = NA, "Pr(>F)" = NA))
})
})
Table 5: Average covariate values in each leaf
|
covariates
|
leaf1
|
leaf2
|
leaf3
|
leaf4
|
leaf5
|
leaf6
|
leaf7
|
leaf8
|
|
percent_co_party
|
-0.302
|
0.638
|
-1.392
|
-1.28
|
0.719
|
0.703
|
0.638
|
0.685
|
|
|
(0.076)
|
(0.09)
|
(0.076)
|
(0.062)
|
(0.069)
|
(0.07)
|
(0.073)
|
(0.076)
|
|
ideo_homogeneity_offline
|
-0.05
|
-0.256
|
-0.89
|
0.552
|
-0.201
|
0.385
|
-1.022
|
1.39
|
|
|
(0.096)
|
(0.114)
|
(0.097)
|
(0.078)
|
(0.088)
|
(0.088)
|
(0.093)
|
(0.096)
|
|
friends_count_wave_1
|
-0.067
|
0.142
|
-0.182
|
-0.182
|
0.108
|
-0.083
|
0.034
|
-0.067
|
|
|
(0.122)
|
(0.145)
|
(0.124)
|
(0.1)
|
(0.112)
|
(0.113)
|
(0.118)
|
(0.122)
|
|
education
|
-0.273
|
-0.115
|
-0.418
|
-0.046
|
0.37
|
-0.199
|
-0.039
|
0.391
|
|
|
(0.143)
|
(0.17)
|
(0.145)
|
(0.117)
|
(0.132)
|
(0.133)
|
(0.139)
|
(0.143)
|
|
family_income
|
0.112
|
-0.285
|
-0.131
|
-0.06
|
0.172
|
-0.109
|
0.112
|
0.124
|
|
|
(0.143)
|
(0.17)
|
(0.145)
|
(0.117)
|
(0.132)
|
(0.133)
|
(0.139)
|
(0.143)
|
|
birth_year
|
0.151
|
0.099
|
0.285
|
-0.053
|
-0.47
|
-0.013
|
-0.162
|
-0.333
|
|
|
(0.136)
|
(0.161)
|
(0.137)
|
(0.111)
|
(0.125)
|
(0.126)
|
(0.132)
|
(0.136)
|
|
endtime_wave_5
|
2.204
|
-0.565
|
-0.296
|
-0.25
|
-0.574
|
-0.073
|
0.04
|
-0.316
|
|
|
(0.09)
|
(0.107)
|
(0.091)
|
(0.074)
|
(0.083)
|
(0.083)
|
(0.087)
|
(0.09)
|
|
substantive_ideology_scale_wave_1
|
0.1
|
0.278
|
0.246
|
0.508
|
-0.529
|
-0.184
|
-0.47
|
-0.501
|
|
|
(0.131)
|
(0.156)
|
(0.133)
|
(0.107)
|
(0.12)
|
(0.122)
|
(0.127)
|
(0.131)
|
|
gender
|
1.583
|
1.412
|
1.468
|
1.528
|
1.544
|
1.464
|
1.569
|
1.562
|
|
|
(0.072)
|
(0.086)
|
(0.073)
|
(0.059)
|
(0.066)
|
(0.067)
|
(0.07)
|
(0.072)
|
|
northeast
|
0.25
|
0.147
|
0.234
|
0.236
|
0.281
|
0.089
|
0.255
|
0.25
|
|
|
(0.06)
|
(0.071)
|
(0.06)
|
(0.049)
|
(0.055)
|
(0.055)
|
(0.058)
|
(0.06)
|
|
north_central
|
0.208
|
0.265
|
0.17
|
0.25
|
0.21
|
0.214
|
0.235
|
0.167
|
|
|
(0.06)
|
(0.071)
|
(0.06)
|
(0.049)
|
(0.055)
|
(0.055)
|
(0.058)
|
(0.06)
|
|
south
|
0.375
|
0.441
|
0.362
|
0.417
|
0.281
|
0.411
|
0.314
|
0.271
|
|
|
(0.069)
|
(0.082)
|
(0.07)
|
(0.057)
|
(0.064)
|
(0.064)
|
(0.067)
|
(0.069)
|
|
west
|
0.167
|
0.147
|
0.234
|
0.139
|
0.228
|
0.286
|
0.196
|
0.312
|
|
|
(0.059)
|
(0.07)
|
(0.06)
|
(0.048)
|
(0.054)
|
(0.055)
|
(0.057)
|
(0.059)
|
|
party_id_wave_1
|
1.562
|
1.324
|
1.681
|
1.778
|
1.158
|
1.25
|
1.196
|
1.146
|
|
|
(0.062)
|
(0.073)
|
(0.062)
|
(0.05)
|
(0.057)
|
(0.057)
|
(0.06)
|
(0.062)
|
|
political_wave_1
|
1.396
|
1.324
|
1.532
|
1.361
|
1.018
|
1.125
|
1.137
|
1.104
|
|
|
(0.058)
|
(0.069)
|
(0.058)
|
(0.047)
|
(0.053)
|
(0.053)
|
(0.056)
|
(0.058)
|
|
strong_partisan_wave_1
|
0.562
|
0.147
|
0.447
|
0.528
|
1
|
0.679
|
0.608
|
0.75
|
|
|
(0.064)
|
(0.076)
|
(0.064)
|
(0.052)
|
(0.058)
|
(0.059)
|
(0.062)
|
(0.064)
|
|
freq_twitter_wave_1
|
1.542
|
1.471
|
1.447
|
1.556
|
1.298
|
1.268
|
1.314
|
1.396
|
|
|
(0.07)
|
(0.083)
|
(0.071)
|
(0.057)
|
(0.064)
|
(0.065)
|
(0.068)
|
(0.07)
|
|
bot_followers
|
0.333
|
0.471
|
0.34
|
0.389
|
0.421
|
0.286
|
0.333
|
0.417
|
|
|
(0.07)
|
(0.083)
|
(0.071)
|
(0.057)
|
(0.064)
|
(0.065)
|
(0.068)
|
(0.07)
|
|
half_complier
|
0.188
|
0.353
|
0.192
|
0.208
|
0.333
|
0.232
|
0.196
|
0.292
|
|
|
(0.062)
|
(0.074)
|
(0.063)
|
(0.051)
|
(0.057)
|
(0.057)
|
(0.06)
|
(0.062)
|
|
perfect_complier
|
0.042
|
0.147
|
0.192
|
0.167
|
0.088
|
0.107
|
0.078
|
0.083
|
|
|
(0.046)
|
(0.054)
|
(0.046)
|
(0.037)
|
(0.042)
|
(0.042)
|
(0.044)
|
(0.046)
|
|
bin_maker
|
8.083
|
10.24
|
9.66
|
9.167
|
1.965
|
5.089
|
5.51
|
4.187
|
|
|
(0.598)
|
(0.711)
|
(0.604)
|
(0.488)
|
(0.549)
|
(0.554)
|
(0.58)
|
(0.598)
|
|
caseid
|
703.6
|
856.5
|
796.8
|
770.3
|
232.3
|
469.9
|
503.9
|
406.5
|
|
|
(43.51)
|
(51.7)
|
(43.97)
|
(35.53)
|
(39.93)
|
(40.28)
|
(42.21)
|
(43.51)
|
Table 6: Covariate variation across leaves
|
percent_co_party
|
0.7451
|
|
endtime_wave_5
|
0.6350
|
|
ideo_homogeneity_offline
|
0.5500
|
|
caseid
|
0.3210
|
|
bin_maker
|
0.3118
|
|
party_id_wave_1
|
0.2539
|
|
strong_partisan_wave_1
|
0.1934
|
|
substantive_ideology_scale_wave_1
|
0.1601
|
|
political_wave_1
|
0.1496
|
|
education
|
0.0684
|
|
birth_year
|
0.0585
|
|
freq_twitter_wave_1
|
0.0486
|
|
perfect_complier
|
0.0221
|
|
northeast
|
0.0214
|
|
west
|
0.0211
|
|
family_income
|
0.0199
|
|
half_complier
|
0.0192
|
|
friends_count_wave_1
|
0.0175
|
|
south
|
0.0153
|
|
bot_followers
|
0.0124
|
|
gender
|
0.0110
|
|
north_central
|
0.0058
|
7.Causal Forest
This section details the Causal Forest, which uses aggregated causal trees to assess treatment heterogeneity. The model is trained on the training data, and covariates are analyzed across trees.
Fitting the forest
# Adding a factor column turns all columns into character
data_train <- as.data.frame(apply(data_train, 2, as.numeric))
data_test <- as.data.frame(apply(data_test, 2, as.numeric))
library(grf)
##
## Attaching package: 'grf'
## The following object is masked from 'package:amlinear':
##
## average_partial_effect
cf <- causal_forest(
X = as.matrix(data_train[,covariates]),
Y = data_train$Y,
W = data_train$W,
num.trees=500)
Point estimates & standard errors
##Training set (OOB)
oob_pred <- predict(cf, estimate.variance=TRUE)
oob_tauhat_cf <- oob_pred$predictions
oob_tauhat_cf_se <- sqrt(oob_pred$variance.estimates)
##Test set
test_pred <- predict(cf, newdata=as.matrix(data_test[,covariates]), estimate.variance=TRUE)
tauhat_cf_test <- test_pred$predictions
tauhat_cf_test_se <- sqrt(test_pred$variance.estimates)
cf_known_prop <- grf::causal_forest(
X = as.matrix(data_train[covariates]),
Y = data_train$Y,
W = data_train$W,
W.hat = rep(mean(data_train$W), times=nrow(data_train)))
Table 7: Causal Forest Out of Bag Predictions
|
predictions
|
variance.estimates
|
debiased.error
|
|
0.0003
|
0.0030
|
0.0101
|
|
0.0272
|
0.0091
|
0.2438
|
|
-0.0049
|
0.0096
|
0.0688
|
Table 8: Causal Forest Test Set Predictions
|
predictions
|
variance.estimates
|
|
-0.0349
|
0.0019
|
|
-0.0289
|
0.0068
|
|
-0.0365
|
0.0143
|
Subgroup heterogeneity
# Manually creating subgroups
num_tiles <- 4 # ntiles = CATE is above / below the median
data_train$cate <- oob_tauhat_cf
data_train$ntile <- factor(ntile(oob_tauhat_cf, n=num_tiles))
ols_sample_ate <- lm("Y ~ ntile + ntile:W", data=data_train)
estimated_sample_ate <- coef(summary(ols_sample_ate))[(num_tiles+1):(2*num_tiles), c("Estimate", "Std. Error")]
hypothesis_sample_ate <- paste0("ntile1:W = ", paste0("ntile", seq(2, num_tiles), ":W"))
ftest_pvalue_sample_ate <- linearHypothesis(ols_sample_ate, hypothesis_sample_ate)[2,"Pr(>F)"]
# Not the most elegent way to make this table, but it works
sample_ate_table <- data.frame(cbind(estimated_sample_ate,c("-",hypothesis_sample_ate),c(ftest_pvalue_sample_ate)))
colnames(sample_ate_table)[3:4] = c("Hypotheses","F-Test")
sample_ate_table[,c(1,2,4)] <- sample_ate_table[,c(1,2,4)] %>%
unlist() %>% as.character() %>% as.numeric()
kable_styling(kable(sample_ate_table, "html", digits = 4, booktabs = TRUE,
caption = tab_title("Sample ATE")),
bootstrap_options=c("striped", "hover", "condensed", "responsive"),
full_width=FALSE)
Table 9: Sample ATE
|
|
Estimate
|
Std..Error
|
Hypotheses
|
F-Test
|
|
ntile1:W
|
0.1482
|
0.1853
|
|
0.9206
|
|
ntile2:W
|
0.0797
|
0.1884
|
ntile1:W = ntile2:W
|
0.9206
|
|
ntile3:W
|
-0.0359
|
0.1892
|
ntile1:W = ntile3:W
|
0.9206
|
|
ntile4:W
|
0.0666
|
0.1897
|
ntile1:W = ntile4:W
|
0.9206
|
Covariate heterogeneity
# Regress each covariate on ntile assignment to means p
cov_means <- lapply(covariates, function(covariate) {
lm(paste0(covariate, ' ~ 0 + ntile'), data = data_train)
})
# Extract the mean and standard deviation of each covariate per ntile
cov_table <- lapply(cov_means, function(cov_mean) {
as.data.frame(t(coef(summary(cov_mean))[,c("Estimate", "Std. Error")]))
})
Table 10: Average covariate values in each n-tile
|
covariates
|
ntile1
|
ntile2
|
ntile3
|
ntile4
|
|
percent_co_party
|
0.304
|
0.029
|
-0.278
|
0.08
|
|
|
(0.068)
|
(0.068)
|
(0.068)
|
(0.068)
|
|
ideo_homogeneity_offline
|
0.198
|
0.003
|
-0.123
|
-0.031
|
|
|
(0.069)
|
(0.069)
|
(0.069)
|
(0.069)
|
|
friends_count_wave_1
|
0.111
|
-0.102
|
0.005
|
0.018
|
|
|
(0.071)
|
(0.071)
|
(0.071)
|
(0.071)
|
|
education
|
0.087
|
-0.106
|
-0.046
|
0.143
|
|
|
(0.07)
|
(0.07)
|
(0.07)
|
(0.07)
|
|
family_income
|
-0.122
|
-0.133
|
0.033
|
0.221
|
|
|
(0.07)
|
(0.07)
|
(0.07)
|
(0.07)
|
|
birth_year
|
0.009
|
-0.08
|
0.055
|
-0.27
|
|
|
(0.068)
|
(0.068)
|
(0.068)
|
(0.068)
|
|
endtime_wave_5
|
0.325
|
0.045
|
-0.148
|
-0.323
|
|
|
(0.067)
|
(0.068)
|
(0.068)
|
(0.068)
|
|
substantive_ideology_scale_wave_1
|
-0.684
|
-0.366
|
0.116
|
0.691
|
|
|
(0.061)
|
(0.061)
|
(0.061)
|
(0.061)
|
|
gender
|
1.643
|
1.553
|
1.51
|
1.369
|
|
|
(0.034)
|
(0.034)
|
(0.034)
|
(0.034)
|
|
northeast
|
0.188
|
0.218
|
0.189
|
0.18
|
|
|
(0.028)
|
(0.028)
|
(0.028)
|
(0.028)
|
|
north_central
|
0.174
|
0.238
|
0.209
|
0.238
|
|
|
(0.029)
|
(0.029)
|
(0.029)
|
(0.029)
|
|
south
|
0.435
|
0.384
|
0.345
|
0.369
|
|
|
(0.034)
|
(0.034)
|
(0.034)
|
(0.034)
|
|
west
|
0.217
|
0.175
|
0.262
|
0.214
|
|
|
(0.029)
|
(0.029)
|
(0.029)
|
(0.029)
|
|
party_id_wave_1
|
1.184
|
1.301
|
1.476
|
1.684
|
|
|
(0.032)
|
(0.032)
|
(0.032)
|
(0.032)
|
|
political_wave_1
|
1.184
|
1.272
|
1.34
|
1.199
|
|
|
(0.03)
|
(0.03)
|
(0.03)
|
(0.03)
|
|
strong_partisan_wave_1
|
0.73
|
0.558
|
0.49
|
0.607
|
|
|
(0.034)
|
(0.034)
|
(0.034)
|
(0.034)
|
|
freq_twitter_wave_1
|
1.391
|
1.432
|
1.437
|
1.442
|
|
|
(0.034)
|
(0.034)
|
(0.034)
|
(0.034)
|
|
bot_followers
|
0.377
|
0.359
|
0.408
|
0.301
|
|
|
(0.033)
|
(0.033)
|
(0.033)
|
(0.033)
|
|
half_complier
|
0.242
|
0.267
|
0.291
|
0.218
|
|
|
(0.03)
|
(0.03)
|
(0.03)
|
(0.03)
|
|
perfect_complier
|
0.121
|
0.102
|
0.116
|
0.087
|
|
|
(0.021)
|
(0.022)
|
(0.022)
|
(0.022)
|
|
bin_maker
|
4.657
|
6.714
|
8.097
|
7.723
|
|
|
(0.332)
|
(0.332)
|
(0.332)
|
(0.332)
|
|
caseid
|
429.4
|
585.4
|
692.8
|
666.3
|
|
|
(24.29)
|
(24.35)
|
(24.35)
|
(24.35)
|
covariate_means_per_ntile <- aggregate(. ~ ntile, data_train, mean)[,covariates]
covariate_means <- apply(data_train, 2, mean)[covariates]
ntile_weights <- table(data_train$ntile) / dim(data_train)[1]
deviations <- t(apply(covariate_means_per_ntile, 1, function(x) x - covariate_means))
covariate_means_weighted_var <- apply(deviations, 2, function(x) sum(ntile_weights * x^2))
covariate_var <- apply(data_train, 2, var)[covariates]
cov_variation <- covariate_means_weighted_var / covariate_var
Table 11: Covariate variation across n-tiles
|
substantive_ideology_scale_wave_1
|
0.2614
|
|
party_id_wave_1
|
0.1474
|
|
caseid
|
0.0798
|
|
bin_maker
|
0.0730
|
|
endtime_wave_5
|
0.0580
|
|
percent_co_party
|
0.0431
|
|
gender
|
0.0391
|
|
strong_partisan_wave_1
|
0.0317
|
|
political_wave_1
|
0.0208
|
|
family_income
|
0.0201
|
|
birth_year
|
0.0159
|
|
ideo_homogeneity_offline
|
0.0138
|
|
education
|
0.0099
|
|
bot_followers
|
0.0065
|
|
west
|
0.0056
|
|
friends_count_wave_1
|
0.0055
|
|
south
|
0.0046
|
|
north_central
|
0.0041
|
|
half_complier
|
0.0039
|
|
perfect_complier
|
0.0018
|
|
freq_twitter_wave_1
|
0.0016
|
|
northeast
|
0.0014
|
tc <- test_calibration(cf)
Table 12: Best linear fit using forest predictions (on held-out data) as well as the mean forest prediction as regressors, along with heteroskedasticity-robust (HC3) SEs.
|
Estimate
|
Std. Error
|
t value
|
Pr(>|t|)
|
|
-0.712611
|
2.650675
|
-0.268841
|
0.788119
|
|
-7.282025
|
1.226377
|
-5.937838
|
0.000000
|
8.Learners
This section shows the creation of the various learner estimates, as well as comparisons and combinations of these estimates.
S-Learner
df <- as.data.frame(apply(df, 2, as.numeric))
data_train <- as.data.frame(apply(data_train, 2, as.numeric))
data_test <- as.data.frame(apply(data_test, 2, as.numeric))
X = as.matrix(data_train[,covariates])
W = data_train$W
Y = data_train$Y
sf = regression_forest(cbind(X, W), Y)
pred.sf.0 = predict(sf, cbind(X, 0))$predictions
pred.sf.1 = predict(sf, cbind(X, 1))$predictions
preds.sf.oob = predict(sf)$predictions
pred.sf.0[W==0] = preds.sf.oob[W==0]
pred.sf.1[W==1] = preds.sf.oob[W==1]
preds.sf = pred.sf.1 - pred.sf.0
# estimate test data
pred.sf.0_test = predict(sf, cbind(data_test, 0))$predictions
pred.sf.1_test = predict(sf, cbind(data_test, 1))$predictions
tauhat_s1_test = pred.sf.1_test - pred.sf.0_test
T-Learner
tf0 = regression_forest(X[W==0,], Y[W==0])
tf1 = regression_forest(X[W==1,], Y[W==1])
tf.preds.0 = predict(tf0, X)$predictions
tf.preds.1 = predict(tf1, X)$predictions
tf.preds.0[W==0] = predict(tf0)$predictions #OOB
tf.preds.1[W==1] = predict(tf1)$predictions #OOB
preds.tf = tf.preds.1 - tf.preds.0
# estimate test data
tf.preds.0_test = predict(tf0,data_test)$predictions
tf.preds.1_test = predict(tf1,data_test)$predictions
tauhat_t1_test = tf.preds.1_test - tf.preds.0_test
X-Learner
tf0 = regression_forest(X[W==0,], Y[W==0])
yhat0 = predict(tf0, X[W==1,])$predictions
xf1 = regression_forest(X[W==1,], Y[W==1]-yhat0)
xf.preds.1 = predict(xf1, X)$predictions
# this line ensures we make OOB predictions when appropriate
xf.preds.1[W==1] = predict(xf1)$predictions
tf1 = regression_forest(X[W==1,], Y[W==1])
yhat1 = predict(tf1, X[W==0,])$predictions
xf0 = regression_forest(X[W==0,], yhat1-Y[W==0])
xf.preds.0 = predict(xf0, X)$predictions
# this line ensures we make OOB predictions when appropriate
xf.preds.0[W==0] = predict(xf0)$predictions
propf = regression_forest(X, W, tune.parameters = TRUE)
ehat = predict(propf)$predictions
preds.xf = (1 - ehat) * xf.preds.1 + ehat * xf.preds.0
# estimate in test data
preds_x1_test.1 = predict(xf1, data_test)$predictions
preds_x1_test.0 = predict(xf0, data_test)$predictions
ehat_test = predict(propf, data_test)$predictions
tauhat_x1_test = (1 - ehat_test) * preds_x1_test.1 + ehat_test * preds_x1_test.0
Compare MSE
#Comparing predictions
# Compute Y-star
p <- mean(data_test$W)
Y_star <- ((data_test$W - p)/(p*(1-p)))*data_test$Y
# Compute the sample average treatment effect to use as a baseline comparison
tauhat_sample_ate <- with(data_train, mean(Y[W==1]) - mean(Y[W==0]))
#Compute test MSE for all methods
mse <- data.frame(
Sample_ATE_Loss = (Y_star - tauhat_sample_ate)^2,
S_Learner_Loss = (Y_star - tauhat_s1_test)^2,
T_Learner_Loss = (Y_star - tauhat_t1_test)^2,
X_Learner_Loss = (Y_star - tauhat_x1_test)^2,
Causal_Tree_Loss = (Y_star - tauhat_ct_test)^2,
Causal_Forest_Loss = (Y_star - tauhat_cf_test)^2)
mse_summary1 <- describe(mse)[, c('mean', 'se')]
kable_styling(kable(mse_summary1, "html", digits = 4, row.names=TRUE,
caption=tab_title("MSE Across Estimators"), booktabs = TRUE),
bootstrap_options=c("striped", "hover", "condensed", "responsive"),
full_width=FALSE)
Table 13: MSE Across Estimators
|
|
mean
|
se
|
|
Sample_ATE_Loss
|
64.9801
|
4.9635
|
|
S_Learner_Loss
|
64.9465
|
4.8664
|
|
T_Learner_Loss
|
64.8882
|
4.9159
|
|
X_Learner_Loss
|
65.1182
|
4.9221
|
|
Causal_Tree_Loss
|
65.6354
|
5.1290
|
|
Causal_Forest_Loss
|
64.9631
|
4.8694
|
Compare Mean Estimated Treatment Effects
ates <- data.frame(
Sample_ATE = rep(tauhat_sample_ate, nrow(data_train)),
S_Learner_ATE = preds.sf,
T_Learner_ATE = preds.tf,
X_Learner_ATE = preds.xf,
Causal_Forest_ATE= oob_tauhat_cf
)
ate_sum <- describe(ates)[, c('mean', 'se')]
kable_styling(kable(ate_sum, "html", digits = 4, row.names=TRUE,
caption=tab_title("ATE Across Estimators"), booktabs = TRUE),
bootstrap_options=c("striped", "hover", "condensed", "responsive"),
full_width=FALSE)
Table 14: ATE Across Estimators
|
|
mean
|
se
|
|
Sample_ATE
|
0.1244
|
0.0000
|
|
S_Learner_ATE
|
0.0003
|
0.0016
|
|
T_Learner_ATE
|
0.0107
|
0.0044
|
|
X_Learner_ATE
|
0.0051
|
0.0017
|
|
Causal_Forest_ATE
|
-0.0152
|
0.0011
|
R Loss Function
Y.forest.test = regression_forest(X = as.matrix(data_test[covariates]), Y = data_test$Y)
Y.hat.test = predict(Y.forest.test)$predictions
W.forest.test = regression_forest(X = as.matrix(data_test[covariates]), Y = data_test$W)
W.hat.test = predict(W.forest.test)$predictions
mse_rloss <- data.frame(
Sample_ATE_Loss = (data_test$Y - Y.hat.test - (data_test$W - W.hat.test) * tauhat_sample_ate)^2,
S_Learner_Loss = (data_test$Y - Y.hat.test - (data_test$W - W.hat.test) * tauhat_s1_test)^2,
T_Learner_Loss = (data_test$Y - Y.hat.test - (data_test$W - W.hat.test) * tauhat_t1_test)^2,
X_Learner_Loss = (data_test$Y - Y.hat.test - (data_test$W - W.hat.test) * tauhat_x1_test)^2,
Causal_Tree_Loss = (data_test$Y - Y.hat.test - (data_test$W - W.hat.test) * tauhat_ct_test)^2,
Causal_Forest_Loss = (data_test$Y - Y.hat.test - (data_test$W - W.hat.test) * tauhat_cf_test)^2)
mse_rloss_summary <- describe(mse_rloss)[, c('mean', 'se')]
kable_styling(kable(mse_rloss_summary, "html", digits = 4, row.names=TRUE,
caption=tab_title("MSE R Loss Across Estimators"), booktabs = TRUE),
bootstrap_options=c("striped", "hover", "condensed", "responsive"),
full_width=FALSE)
Table 15: MSE R Loss Across Estimators
|
|
mean
|
se
|
|
Sample_ATE_Loss
|
0.2000
|
0.0204
|
|
S_Learner_Loss
|
0.2026
|
0.0204
|
|
T_Learner_Loss
|
0.2031
|
0.0204
|
|
X_Learner_Loss
|
0.2013
|
0.0204
|
|
Causal_Tree_Loss
|
0.2102
|
0.0222
|
|
Causal_Forest_Loss
|
0.2026
|
0.0204
|
Stacking
library(nnls)
## Stacking test
prob = 0.6
NREP = 50
RESP = data_train$Y - cf$Y.hat
R.mat = cbind(1, data_train$W - prob,
(data_train$W - prob) * oob_tauhat_cf,
(data_train$W - prob) * preds.xf)
stack = nnls(R.mat, RESP)
print("coefs")
## [1] "coefs"
print(stack)
## Nonnegative least squares model
## x estimates: 0 0.006282432 0 0
## residual sum-of-squares: 158.6
## reason terminated: The solution has been computed sucessfully.
stack.results = data.frame(stack$x)
rownames(stack.results) = c("Intercept","W-p","Causal Forest","X Learner")
kable_styling(kable(stack.results, "html", digits = 4, row.names=TRUE,
caption=tab_title("Stacking Coefficients"), booktabs = TRUE),
bootstrap_options=c("striped", "hover", "condensed", "responsive"),
full_width=FALSE)
Table 16: Stacking Coefficients
|
|
stack.x
|
|
Intercept
|
0.0000
|
|
W-p
|
0.0063
|
|
Causal Forest
|
0.0000
|
|
X Learner
|
0.0000
|