library(openxlsx)
library(stringr) 
library(caTools) 
library(car)
library(foreign) 
library(readr)
library(dplyr)
library(tidyr)
library(survey)
library(haven)
library(gmodels)
library(randomForest)
library(vita)
library(ggplot2)

This project explores a basic application of the logit regression machine learning algorithm. The project and data used are for practice and were drawn from the 2020 American National Election Studies survey.

This is an attempt to create an algorithm that predicts the log odds of partisan preference using demographic characteristics and news media consumption by broadcast/platform type and program/network.

# call data
a20<-read_dta("C:\\Users\\Jaire\\OneDrive\\Desktop\\Exploratory Research\\Data\\ANES2020T.dta")

Features & Transformations

# sex
a20$male<-car::recode(a20$V201600, recodes = "2=0;-9=NA")
a20$female<-car::recode(a20$V201600, recodes = "2=1;1=0;-9=NA")
table(a20$male)
## 
##    0    1 
## 4450 3763
table(a20$female)
## 
##    0    1 
## 3763 4450
# race non-Hispanic, Hispanic
a20$nh.white<-car::recode(a20$V201549x, recodes = "2=0;3=NA;4:6=0;-9:-1=NA")
a20$nh.black<-car::recode(a20$V201549x, recodes = "1=0;2=1;3=NA;4:6=0;-9:-1=NA")
a20$Hispanic<-car::recode(a20$V201549x, recodes = "1:2=0;3=1;4:6=0;-9:-1=NA")
a20$nh.ANHPI<-car::recode(a20$V201549x, recodes = "1:2=0;3=NA;4=1;5:6=0;-9:-1=NA")
a20$nh.NAAN<-car::recode(a20$V201549x, recodes = "1:2=0;3=NA;4=0;5=1;6=0;-9:-1=NA")
a20$nh.multr<-car::recode(a20$V201549x, recodes = "1:2=0;3=NA;4:5=0;6=1;-9:-1=NA")

table(a20$nh.white)
## 
##    0    1 
## 1453 5963
table(a20$nh.black)
## 
##    0    1 
## 6690  726
table(a20$Hispanic)
## 
##    0    1 
## 7416  762
table(a20$nh.ANHPI)
## 
##    0    1 
## 7132  284
table(a20$nh.NAAN)
## 
##    0    1 
## 7244  172
table(a20$nh.multr)
## 
##    0    1 
## 7145  271
# age
a20$age<-car::recode(a20$V201507x, recodes = "-9:-1=NA")
a20$GenZ<-car::recode(a20$V201507x, recodes = "18:23=1;24:80=0;-9:-1=NA")
a20$GenY<-car::recode(a20$V201507x, recodes = "24:39=1;18:23=0;40:80=0;-9:-1=NA")
a20$GenX<-car::recode(a20$V201507x, recodes = "40:55=1;18:39=0;56:80=0;-9:-1=NA")
a20$BoomerGen<-car::recode(a20$V201507x, recodes = "56:74=1;18:55=0;75:80=0;-9:-1=NA")
a20$SilentGen<-car::recode(a20$V201507x, recodes = "75:80=1;18:74=0;-9:-1=NA")
table(a20$age)
## 
##  18  19  20  21  22  23  24  25  26  27  28  29  30  31  32  33  34  35  36  37 
##  35  52  46  51  57  75  92 104 108 132 120 131 142 109 117 123 142 152 144 149 
##  38  39  40  41  42  43  44  45  46  47  48  49  50  51  52  53  54  55  56  57 
## 152 151 139 151 113 116 111 116 119 106 105 123 154 128 111 117 123 140 127 136 
##  58  59  60  61  62  63  64  65  66  67  68  69  70  71  72  73  74  75  76  77 
## 145 154 168 139 154 156 155 180 170 142 140 158 126 147 145 147  94  93  89  81 
##  78  79  80 
##  64  63 403
# income
a20$inc.under30k<-car::recode(a20$V202468x, recodes = "1:5=1;6:22=0;-9:-1=NA")
a20$inc.30_59.9k<-car::recode(a20$V202468x, recodes = "1:5=0;6:10=1;11:22=0;-9:-1=NA")
a20$inc.60_99.9k<-car::recode(a20$V202468x, recodes = "1:10=0;11:16=1;17:22=0;-9:-1=NA")
a20$inc.100_250K<-car::recode(a20$V202468x, recodes = "1:16=0;17:22=1;-9:-1=NA")
table(a20$inc.under30k)
## 
##    0    1 
## 6106 1874
table(a20$inc.30_59.9k)
## 
##    0    1 
## 6242 1738
table(a20$inc.60_99.9k)
## 
##    0    1 
## 6164 1816
table(a20$inc.100_250K)
## 
##    0    1 
## 5428 2552
# education
a20$Lsh<-car::recode(a20$V201511x, recodes = "-9:-2=NA;2:5=0")
a20$Hs<-car::recode(a20$V201511x, recodes = "-9:-2=NA;1=0;2=1;3:5=0")
a20$Hsc<-car::recode(a20$V201511x, recodes = "-9:-2=NA;1:2=0;3=1;4:5=0")
a20$Cdg<-car::recode(a20$V201511x, recodes = "-9:-2=NA;1:3=0;4=1;5=0")
a20$Gdg<-car::recode(a20$V201511x, recodes = "-9:-2=NA;1:4=0;5=1")
table(a20$Lsh)
## 
##    0    1 
## 7773  376
table(a20$Hs)
## 
##    0    1 
## 6813 1336
table(a20$Hsc)
## 
##    0    1 
## 5359 2790
table(a20$Cdg)
## 
##    0    1 
## 6094 2055
table(a20$Gdg)
## 
##    0    1 
## 6557 1592
# party id
a20$dem<-car::recode(a20$V201228, recodes = "-9:-4=NA;2:5=0")
a20$rep<-car::recode(a20$V201228, recodes = "-9:-4=NA;1=0;2=1;3:5=0")
a20$ind<-car::recode(a20$V201228, recodes = "-9:-4=NA;1:2=0;3=1;5=0")
table(a20$dem)
## 
##    0    1 
## 5366 2865
table(a20$rep)
## 
##    0    1 
## 5668 2563
table(a20$ind)
## 
##    0    1 
## 5704 2527
# marital status
a20$married<-car::recode(a20$V201508, recodes = "-9:-1=NA;1:2=1;3:6=0")
a20$widowed<-car::recode(a20$V201508, recodes = "-9:-1=NA;1:2=0;3=1;4:6=0")
a20$divorced<-car::recode(a20$V201508, recodes = "-9:-1=NA;1:3=0;4=1;5:6=0")
a20$separated<-car::recode(a20$V201508, recodes = "-9:-1=NA;1:4=0;5=1;6=0")
a20$nvmarried<-car::recode(a20$V201508, recodes = "-9:-1=NA;1:5=0;6=1")
table(a20$married)
## 
##    0    1 
## 3902 4322
table(a20$widowed)
## 
##    0    1 
## 7657  567
table(a20$divorced)
## 
##    0    1 
## 7003 1221
table(a20$separated)
## 
##    0    1 
## 8061  163
table(a20$nvmarried)
## 
##    0    1 
## 6273 1951
# TV news program use in English
a20$tveng.COLBERT<-car::recode(a20$V201630a, recodes = "-9:-1=NA")
a20$tveng.HANNITY<-car::recode(a20$V201630b, recodes = "-9:-1=NA")
a20$tveng.TCARLSON<-car::recode(a20$V201630c, recodes = "-9:-1=NA")
a20$tveng.RMADDOW<-car::recode(a20$V201630d, recodes = "-9:-1=NA")
a20$tveng.LODONNELL<-car::recode(a20$V201630e, recodes = "-9:-1=NA")
a20$tveng.THEFIVE<-car::recode(a20$V201630f, recodes = "-9:-1=NA")
a20$tveng.THEINGRAHAMANGLE<-car::recode(a20$V201630g, recodes = "-9:-1=NA")
a20$tveng.MACCALLUM<-car::recode(a20$V201630h, recodes = "-9:-1=NA")
a20$tveng.JTAPPER<-car::recode(a20$V201630i, recodes = "-9:-1=NA")
a20$tveng.ACOOPER<-car::recode(a20$V201630j, recodes = "-9:-1=NA")
a20$tveng.BBAIER<-car::recode(a20$V201630k, recodes = "-9:-1=NA")
a20$tveng.CBSODONNELL<-car::recode(a20$V201630m, recodes = "-9:-1=NA")
a20$tveng.ABCWORLDNEWS<-car::recode(a20$V201630n, recodes = "-9:-1=NA")
a20$tveng.NBCLESTERHOLT<-car::recode(a20$V201630p, recodes = "-9:-1=NA")
a20$tveng.CUOMO<-car::recode(a20$V201630q, recodes = "-9:-1=NA")
a20$tveng.EBURNETT<-car::recode(a20$V201631b, recodes = "-9:-1=NA")
a20$tveng.SIXTYMINUTES<-car::recode(a20$V201631c, recodes = "-9:-1=NA")
a20$tveng.TWENTY20<-car::recode(a20$V201631d, recodes = "-9:-1=NA")
a20$tveng.DATELINE<-car::recode(a20$V201631e, recodes = "-9:-1=NA")
a20$tveng.FACETHENATION<-car::recode(a20$V201631f, recodes = "-9:-1=NA")
a20$tveng.MEETTHEPRESS<-car::recode(a20$V201631g, recodes = "-9:-1=NA")
a20$tveng.CBSTHISMORNING<-car::recode(a20$V201631h, recodes = "-9:-1=NA")
a20$tveng.GOODMORNINGAMERICA<-car::recode(a20$V201631i, recodes = "-9:-1=NA")
a20$tveng.TODAY<-car::recode(a20$V201631j, recodes = "-9:-1=NA")
a20$tveng.FOXFRIENDS<-car::recode(a20$V201631k, recodes = "-9:-1=NA")
a20$tveng.MORNINGJOE<-car::recode(a20$V201631m, recodes = "-9:-1=NA")
a20$tveng.CHAYES<-car::recode(a20$V201631n, recodes = "-9:-1=NA")
a20$tveng.BWILLIAMS<-car::recode(a20$V201631p, recodes = "-9:-1=NA")
a20$tveng.PBSNEWSHOUR<-car::recode(a20$V201631q, recodes = "-9:-1=NA")
# TV news programos en Espanol
a20$tvesp.ALPUNTO<-car::recode(a20$V201632a, recodes = "-9:-1=NA")
a20$tvesp.ALROJOVIVO<-car::recode(a20$V201632b, recodes = "-9:-1=NA")
a20$tvesp.DESPIERTAAMERICA<-car::recode(a20$V201632f, recodes = "-9:-1=NA")
a20$tvesp.HOY<-car::recode(a20$V201632h, recodes = "-9:-1=NA")
a20$tvesp.LOMEJORDECASOCERRADO<-car::recode(a20$V201632i, recodes = "-9:-1=NA")
a20$tvesp.NOTICIEROESTRELLATV<-car::recode(a20$V201632j, recodes = "-9:-1=NA")
a20$tvesp.NOTICIEROTELEMUNDO<-car::recode(a20$V201632k, recodes = "-9:-1=NA")
a20$tvesp.NOTICIEROUNIVISION<-car::recode(a20$V201632m, recodes = "-9:-1=NA")
a20$tvesp.UNNUEVODIA<-car::recode(a20$V201632n, recodes = "-9:-1=NA")
a20$tvesp.PRIMERIMPACTO<-car::recode(a20$V201632p, recodes = "-9:-1=NA")
# radio news programs in English
a20$reng.RUSHLIMBAUGH<-car::recode(a20$V201633a, recodes = "-9:-1=NA")
a20$reng.SEANHANNITYSHOW<-car::recode(a20$V201633b, recodes = "-9:-1=NA")
a20$reng.NPRMARKETPLACE<-car::recode(a20$V201633c, recodes = "-9:-1=NA")
a20$reng.NPRALLTHINGSCONSIDERED<-car::recode(a20$V201633d, recodes = "-9:-1=NA")
a20$reng.DAVERAMSEY<-car::recode(a20$V201633e, recodes = "-9:-1=NA")
a20$reng.NPRMORNINGEDITION<-car::recode(a20$V201633f, recodes = "-9:-1=NA")
a20$reng.MARKLEVIN<-car::recode(a20$V201633g, recodes = "-9:-1=NA")
a20$reng.GLENNBECK<-car::recode(a20$V201633h, recodes = "-9:-1=NA")
a20$reng.MIKEGALLAGHER<-car::recode(a20$V201633i, recodes = "-9:-1=NA")
a20$reng.HUGHHEWITT<-car::recode(a20$V201633j, recodes = "-9:-1=NA")
a20$reng.THESAVAGENATION<-car::recode(a20$V201633k, recodes = "-9:-1=NA")
a20$reng.THEDANASHOW<-car::recode(a20$V201633m, recodes = "-9:-1=NA")
a20$reng.THOMHARTMANN<-car::recode(a20$V201633n, recodes = "-9:-1=NA")
a20$reng.JIMBOHANNON<-car::recode(a20$V201633p, recodes = "-9:-1=NA")
a20$reng.NPRFRESHAIR<-car::recode(a20$V201633q, recodes = "-9:-1=NA")
# news website 
a20$nweb.YAHOONEWS<-car::recode(a20$V201634a, recodes = "-9:-1=NA")
a20$nweb.CNNNEWS<-car::recode(a20$V201634b, recodes = "-9:-1=NA")
a20$nweb.HUFFINGTONPOST<-car::recode(a20$V201634c, recodes = "-9:-1=NA")
a20$nweb.NYTIMES<-car::recode(a20$V201634d, recodes = "-9:-1=NA")
a20$nweb.BREITBART<-car::recode(a20$V201634e, recodes = "-9:-1=NA")
a20$nweb.FOXNEWS<-car::recode(a20$V201634f, recodes = "-9:-1=NA")
a20$nweb.WASHINGTONPOST<-car::recode(a20$V201634g, recodes = "-9:-1=NA")
a20$nweb.THEGUARDIAN<-car::recode(a20$V201634h, recodes = "-9:-1=NA")
a20$nweb.USATODAY<-car::recode(a20$V201634i, recodes = "-9:-1=NA")
a20$nweb.BBCNEWS<-car::recode(a20$V201634j, recodes = "-9:-1=NA")
a20$nweb.NPRNEWS<-car::recode(a20$V201634k, recodes = "-9:-1=NA")
a20$nweb.DAILYCALLER<-car::recode(a20$V201634m, recodes = "-9:-1=NA")
a20$nweb.BLOOMBERG<-car::recode(a20$V201634n, recodes = "-9:-1=NA")
a20$nweb.BUZZFEED<-car::recode(a20$V201634p, recodes = "-9:-1=NA")
a20$nweb.NBCNEWS<-car::recode(a20$V201634q, recodes = "-9:-1=NA")
# printed news paper
a20$npprint.NYTIMES<-car::recode(a20$V201635a, recodes = "-9:-1=NA")
a20$npprint.USATODAY<-car::recode(a20$V201635b, recodes = "-9:-1=NA")
a20$npprint.WSJ<-car::recode(a20$V201635c, recodes = "-9:-1=NA")
a20$npprint.WASHPOST<-car::recode(a20$V201635d, recodes = "-9:-1=NA")
# web news paper
a20$npweb.NYTIMES<-car::recode(a20$V201636a, recodes = "-9:-1=NA")
a20$npweb.USATODAY<-car::recode(a20$V201636b, recodes = "-9:-1=NA")
a20$npweb.WSJ<-car::recode(a20$V201636c, recodes = "-9:-1=NA")
a20$npweb.WASHPOST<-car::recode(a20$V201636d, recodes = "-9:-1=NA")
# recode NA's as "0" for ML algorithm
a20[is.na(a20)] = 0

Assess Data

# subset, w/o V200001 (Case ID)
a20sub<-dplyr::select(a20,male,GenZ,GenY,GenX,BoomerGen,SilentGen,nh.white,nh.black,Hispanic,nh.ANHPI,nh.NAAN,nh.multr,inc.under30k,inc.30_59.9k,inc.60_99.9k,inc.100_250K,Lsh,Hs,Hsc,Cdg,Gdg,rep,married,widowed,divorced,separated,nvmarried,tveng.COLBERT,tveng.HANNITY,tveng.TCARLSON,tveng.RMADDOW,tveng.LODONNELL,tveng.THEFIVE,tveng.THEINGRAHAMANGLE,tveng.MACCALLUM,tveng.JTAPPER,tveng.ACOOPER,tveng.BBAIER,tveng.CBSODONNELL,tveng.ABCWORLDNEWS,tveng.NBCLESTERHOLT,tveng.CUOMO,tveng.EBURNETT,tveng.SIXTYMINUTES,tveng.TWENTY20,tveng.DATELINE,tveng.FACETHENATION,tveng.MEETTHEPRESS,tveng.CBSTHISMORNING,tveng.GOODMORNINGAMERICA,tveng.TODAY,tveng.FOXFRIENDS,tveng.MORNINGJOE,tveng.CHAYES,tveng.BWILLIAMS,tveng.PBSNEWSHOUR,tvesp.ALPUNTO,tvesp.ALROJOVIVO,tvesp.DESPIERTAAMERICA,tvesp.HOY,tvesp.LOMEJORDECASOCERRADO,tvesp.NOTICIEROESTRELLATV,tvesp.NOTICIEROTELEMUNDO,tvesp.NOTICIEROUNIVISION,tvesp.UNNUEVODIA,tvesp.PRIMERIMPACTO,reng.RUSHLIMBAUGH,reng.SEANHANNITYSHOW,reng.NPRMARKETPLACE,reng.NPRALLTHINGSCONSIDERED,reng.DAVERAMSEY,reng.NPRMORNINGEDITION,reng.MARKLEVIN,reng.GLENNBECK,reng.MIKEGALLAGHER,reng.HUGHHEWITT,reng.THESAVAGENATION,reng.THEDANASHOW,reng.THOMHARTMANN,reng.JIMBOHANNON,reng.NPRFRESHAIR,nweb.YAHOONEWS,nweb.CNNNEWS,nweb.HUFFINGTONPOST,nweb.NYTIMES,nweb.BREITBART,nweb.FOXNEWS,nweb.WASHINGTONPOST,nweb.THEGUARDIAN,nweb.USATODAY,nweb.BBCNEWS,nweb.NPRNEWS,nweb.DAILYCALLER,nweb.BLOOMBERG,nweb.BUZZFEED,nweb.NBCNEWS,npprint.NYTIMES,npprint.USATODAY,npprint.WSJ,npprint.WASHPOST,npweb.NYTIMES,npweb.USATODAY,npweb.WSJ,npweb.WASHPOST
)
# check columns
colnames(a20sub)
##   [1] "male"                        "GenZ"                       
##   [3] "GenY"                        "GenX"                       
##   [5] "BoomerGen"                   "SilentGen"                  
##   [7] "nh.white"                    "nh.black"                   
##   [9] "Hispanic"                    "nh.ANHPI"                   
##  [11] "nh.NAAN"                     "nh.multr"                   
##  [13] "inc.under30k"                "inc.30_59.9k"               
##  [15] "inc.60_99.9k"                "inc.100_250K"               
##  [17] "Lsh"                         "Hs"                         
##  [19] "Hsc"                         "Cdg"                        
##  [21] "Gdg"                         "rep"                        
##  [23] "married"                     "widowed"                    
##  [25] "divorced"                    "separated"                  
##  [27] "nvmarried"                   "tveng.COLBERT"              
##  [29] "tveng.HANNITY"               "tveng.TCARLSON"             
##  [31] "tveng.RMADDOW"               "tveng.LODONNELL"            
##  [33] "tveng.THEFIVE"               "tveng.THEINGRAHAMANGLE"     
##  [35] "tveng.MACCALLUM"             "tveng.JTAPPER"              
##  [37] "tveng.ACOOPER"               "tveng.BBAIER"               
##  [39] "tveng.CBSODONNELL"           "tveng.ABCWORLDNEWS"         
##  [41] "tveng.NBCLESTERHOLT"         "tveng.CUOMO"                
##  [43] "tveng.EBURNETT"              "tveng.SIXTYMINUTES"         
##  [45] "tveng.TWENTY20"              "tveng.DATELINE"             
##  [47] "tveng.FACETHENATION"         "tveng.MEETTHEPRESS"         
##  [49] "tveng.CBSTHISMORNING"        "tveng.GOODMORNINGAMERICA"   
##  [51] "tveng.TODAY"                 "tveng.FOXFRIENDS"           
##  [53] "tveng.MORNINGJOE"            "tveng.CHAYES"               
##  [55] "tveng.BWILLIAMS"             "tveng.PBSNEWSHOUR"          
##  [57] "tvesp.ALPUNTO"               "tvesp.ALROJOVIVO"           
##  [59] "tvesp.DESPIERTAAMERICA"      "tvesp.HOY"                  
##  [61] "tvesp.LOMEJORDECASOCERRADO"  "tvesp.NOTICIEROESTRELLATV"  
##  [63] "tvesp.NOTICIEROTELEMUNDO"    "tvesp.NOTICIEROUNIVISION"   
##  [65] "tvesp.UNNUEVODIA"            "tvesp.PRIMERIMPACTO"        
##  [67] "reng.RUSHLIMBAUGH"           "reng.SEANHANNITYSHOW"       
##  [69] "reng.NPRMARKETPLACE"         "reng.NPRALLTHINGSCONSIDERED"
##  [71] "reng.DAVERAMSEY"             "reng.NPRMORNINGEDITION"     
##  [73] "reng.MARKLEVIN"              "reng.GLENNBECK"             
##  [75] "reng.MIKEGALLAGHER"          "reng.HUGHHEWITT"            
##  [77] "reng.THESAVAGENATION"        "reng.THEDANASHOW"           
##  [79] "reng.THOMHARTMANN"           "reng.JIMBOHANNON"           
##  [81] "reng.NPRFRESHAIR"            "nweb.YAHOONEWS"             
##  [83] "nweb.CNNNEWS"                "nweb.HUFFINGTONPOST"        
##  [85] "nweb.NYTIMES"                "nweb.BREITBART"             
##  [87] "nweb.FOXNEWS"                "nweb.WASHINGTONPOST"        
##  [89] "nweb.THEGUARDIAN"            "nweb.USATODAY"              
##  [91] "nweb.BBCNEWS"                "nweb.NPRNEWS"               
##  [93] "nweb.DAILYCALLER"            "nweb.BLOOMBERG"             
##  [95] "nweb.BUZZFEED"               "nweb.NBCNEWS"               
##  [97] "npprint.NYTIMES"             "npprint.USATODAY"           
##  [99] "npprint.WSJ"                 "npprint.WASHPOST"           
## [101] "npweb.NYTIMES"               "npweb.USATODAY"             
## [103] "npweb.WSJ"                   "npweb.WASHPOST"
# check NA's for after conversion 
print(a20sub)
## # A tibble: 8,280 × 104
##    male        GenZ  GenY  GenX  Boome…¹ Silen…² nh.wh…³ nh.bl…⁴ Hispa…⁵ nh.AN…⁶
##    <dbl+lbl>   <dbl> <dbl> <dbl> <dbl+l> <dbl+l> <dbl+l> <dbl+l> <dbl+l> <dbl+l>
##  1 1 [1. Male] 0     0     1     0       0       0       0       1 [1. … 0      
##  2 0           0     1     0     0       0       0       0       0       1 [1. …
##  3 0           0     0     1     0       0       1 [1. … 0       0       0      
##  4 1 [1. Male] 0     0     1     0       0       0       0       0       1 [1. …
##  5 1 [1. Male] 0     0     0     1       0       0       0       0       0      
##  6 0           0     0     0     1       0       1 [1. … 0       0       0      
##  7 0           0     1     0     0       0       1 [1. … 0       0       0      
##  8 0           0     0     1     0       0       1 [1. … 0       0       0      
##  9 0           0     0     0     1       0       1 [1. … 0       0       0      
## 10 1 [1. Male] 0     0     1     0       0       0       0       1 [1. … 0      
## # … with 8,270 more rows, 94 more variables: nh.NAAN <dbl+lbl>,
## #   nh.multr <dbl+lbl>, inc.under30k <dbl+lbl>, inc.30_59.9k <dbl+lbl>,
## #   inc.60_99.9k <dbl+lbl>, inc.100_250K <dbl+lbl>, Lsh <dbl+lbl>,
## #   Hs <dbl+lbl>, Hsc <dbl+lbl>, Cdg <dbl+lbl>, Gdg <dbl+lbl>, rep <dbl+lbl>,
## #   married <dbl+lbl>, widowed <dbl+lbl>, divorced <dbl+lbl>,
## #   separated <dbl+lbl>, nvmarried <dbl+lbl>, tveng.COLBERT <dbl+lbl>,
## #   tveng.HANNITY <dbl+lbl>, tveng.TCARLSON <dbl+lbl>, …

Train Model

# turn all terms into factors
a20sub_f <- as.data.frame(lapply(a20sub[1:101], factor))
# check factor count
length(which(sapply(a20sub_f, is.factor)==TRUE))
## [1] 101
# examine variable levels
sapply(a20sub_f, levels)
##      male GenZ GenY GenX BoomerGen SilentGen nh.white nh.black Hispanic
## [1,] "0"  "0"  "0"  "0"  "0"       "0"       "0"      "0"      "0"     
## [2,] "1"  "1"  "1"  "1"  "1"       "1"       "1"      "1"      "1"     
##      nh.ANHPI nh.NAAN nh.multr inc.under30k inc.30_59.9k inc.60_99.9k
## [1,] "0"      "0"     "0"      "0"          "0"          "0"         
## [2,] "1"      "1"     "1"      "1"          "1"          "1"         
##      inc.100_250K Lsh Hs  Hsc Cdg Gdg rep married widowed divorced separated
## [1,] "0"          "0" "0" "0" "0" "0" "0" "0"     "0"     "0"      "0"      
## [2,] "1"          "1" "1" "1" "1" "1" "1" "1"     "1"     "1"      "1"      
##      nvmarried tveng.COLBERT tveng.HANNITY tveng.TCARLSON tveng.RMADDOW
## [1,] "0"       "0"           "0"           "0"            "0"          
## [2,] "1"       "1"           "1"           "1"            "1"          
##      tveng.LODONNELL tveng.THEFIVE tveng.THEINGRAHAMANGLE tveng.MACCALLUM
## [1,] "0"             "0"           "0"                    "0"            
## [2,] "1"             "1"           "1"                    "1"            
##      tveng.JTAPPER tveng.ACOOPER tveng.BBAIER tveng.CBSODONNELL
## [1,] "0"           "0"           "0"          "0"              
## [2,] "1"           "1"           "1"          "1"              
##      tveng.ABCWORLDNEWS tveng.NBCLESTERHOLT tveng.CUOMO tveng.EBURNETT
## [1,] "0"                "0"                 "0"         "0"           
## [2,] "1"                "1"                 "1"         "1"           
##      tveng.SIXTYMINUTES tveng.TWENTY20 tveng.DATELINE tveng.FACETHENATION
## [1,] "0"                "0"            "0"            "0"                
## [2,] "1"                "1"            "1"            "1"                
##      tveng.MEETTHEPRESS tveng.CBSTHISMORNING tveng.GOODMORNINGAMERICA
## [1,] "0"                "0"                  "0"                     
## [2,] "1"                "1"                  "1"                     
##      tveng.TODAY tveng.FOXFRIENDS tveng.MORNINGJOE tveng.CHAYES tveng.BWILLIAMS
## [1,] "0"         "0"              "0"              "0"          "0"            
## [2,] "1"         "1"              "1"              "1"          "1"            
##      tveng.PBSNEWSHOUR tvesp.ALPUNTO tvesp.ALROJOVIVO tvesp.DESPIERTAAMERICA
## [1,] "0"               "0"           "0"              "0"                   
## [2,] "1"               "1"           "1"              "1"                   
##      tvesp.HOY tvesp.LOMEJORDECASOCERRADO tvesp.NOTICIEROESTRELLATV
## [1,] "0"       "0"                        "0"                      
## [2,] "1"       "1"                        "1"                      
##      tvesp.NOTICIEROTELEMUNDO tvesp.NOTICIEROUNIVISION tvesp.UNNUEVODIA
## [1,] "0"                      "0"                      "0"             
## [2,] "1"                      "1"                      "1"             
##      tvesp.PRIMERIMPACTO reng.RUSHLIMBAUGH reng.SEANHANNITYSHOW
## [1,] "0"                 "0"               "0"                 
## [2,] "1"                 "1"               "1"                 
##      reng.NPRMARKETPLACE reng.NPRALLTHINGSCONSIDERED reng.DAVERAMSEY
## [1,] "0"                 "0"                         "0"            
## [2,] "1"                 "1"                         "1"            
##      reng.NPRMORNINGEDITION reng.MARKLEVIN reng.GLENNBECK reng.MIKEGALLAGHER
## [1,] "0"                    "0"            "0"            "0"               
## [2,] "1"                    "1"            "1"            "1"               
##      reng.HUGHHEWITT reng.THESAVAGENATION reng.THEDANASHOW reng.THOMHARTMANN
## [1,] "0"             "0"                  "0"              "0"              
## [2,] "1"             "1"                  "1"              "1"              
##      reng.JIMBOHANNON reng.NPRFRESHAIR nweb.YAHOONEWS nweb.CNNNEWS
## [1,] "0"              "0"              "0"            "0"         
## [2,] "1"              "1"              "1"            "1"         
##      nweb.HUFFINGTONPOST nweb.NYTIMES nweb.BREITBART nweb.FOXNEWS
## [1,] "0"                 "0"          "0"            "0"         
## [2,] "1"                 "1"          "1"            "1"         
##      nweb.WASHINGTONPOST nweb.THEGUARDIAN nweb.USATODAY nweb.BBCNEWS
## [1,] "0"                 "0"              "0"           "0"         
## [2,] "1"                 "1"              "1"           "1"         
##      nweb.NPRNEWS nweb.DAILYCALLER nweb.BLOOMBERG nweb.BUZZFEED nweb.NBCNEWS
## [1,] "0"          "0"              "0"            "0"           "0"         
## [2,] "1"          "1"              "1"            "1"           "1"         
##      npprint.NYTIMES npprint.USATODAY npprint.WSJ npprint.WASHPOST
## [1,] "0"             "0"              "0"         "0"             
## [2,] "1"             "1"              "1"         "1"             
##      npweb.NYTIMES
## [1,] "0"          
## [2,] "1"
# target vector (party inclination)
round(prop.table(table(a20sub_f$rep)) * 100, digits = 1)
## 
##  0  1 
## 69 31
table(a20sub_f$rep)
## 
##    0    1 
## 5717 2563
# set seed divide sample into training and test sets
set.seed(123) 
sampleSplit <- sample.split(Y=a20sub_f$rep, SplitRatio=0.7) 
trainSet <- subset(x=a20sub_f, sampleSplit==TRUE)
testSet <- subset(x=a20sub_f, sampleSplit==FALSE)
# train model
train_model <- glm(rep ~ ., family=binomial(link='logit'), data=trainSet)

Evaluate Model

# view train_model
summary(train_model)
## 
## Call:
## glm(formula = rep ~ ., family = binomial(link = "logit"), data = trainSet)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.8573  -0.8430  -0.3608   0.8236   3.3816  
## 
## Coefficients:
##                              Estimate Std. Error z value Pr(>|z|)    
## (Intercept)                  -1.02446    0.45095  -2.272 0.023101 *  
## male1                         0.08503    0.06818   1.247 0.212341    
## GenZ1                        -0.43237    0.26673  -1.621 0.105018    
## GenY1                        -0.18043    0.18914  -0.954 0.340103    
## GenX1                        -0.20294    0.18640  -1.089 0.276270    
## BoomerGen1                   -0.20467    0.18160  -1.127 0.259728    
## SilentGen1                   -0.17383    0.20198  -0.861 0.389431    
## nh.white1                     0.73558    0.32302   2.277 0.022774 *  
## nh.black1                    -1.70728    0.39265  -4.348 1.37e-05 ***
## Hispanic1                    -0.08732    0.34897  -0.250 0.802408    
## nh.ANHPI1                    -0.16090    0.37842  -0.425 0.670694    
## nh.NAAN1                     -0.25878    0.41560  -0.623 0.533509    
## nh.multr1                     0.05441    0.37556   0.145 0.884804    
## inc.under30k1                 0.02983    0.19291   0.155 0.877121    
## inc.30_59.9k1                 0.12027    0.19097   0.630 0.528820    
## inc.60_99.9k1                 0.13929    0.19061   0.731 0.464926    
## inc.100_250K1                 0.24098    0.18943   1.272 0.203322    
## Lsh1                          0.05805    0.32642   0.178 0.858850    
## Hs1                           0.03325    0.29671   0.112 0.910783    
## Hsc1                          0.00653    0.29125   0.022 0.982112    
## Cdg1                          0.03443    0.29388   0.117 0.906742    
## Gdg1                         -0.49740    0.29880  -1.665 0.095985 .  
## married1                      0.16246    0.43551   0.373 0.709128    
## widowed1                     -0.04891    0.45128  -0.108 0.913694    
## divorced1                    -0.15619    0.44161  -0.354 0.723572    
## separated1                   -0.48653    0.49755  -0.978 0.328150    
## nvmarried1                   -0.40744    0.44209  -0.922 0.356726    
## tveng.COLBERT1                0.05174    0.10789   0.480 0.631580    
## tveng.HANNITY1               -0.01554    0.11608  -0.134 0.893512    
## tveng.TCARLSON1              -0.01286    0.11771  -0.109 0.912967    
## tveng.RMADDOW1               -0.14042    0.11894  -1.181 0.237757    
## tveng.LODONNELL1              0.13512    0.11731   1.152 0.249393    
## tveng.THEFIVE1                0.05704    0.12057   0.473 0.636164    
## tveng.THEINGRAHAMANGLE1      -0.17246    0.12019  -1.435 0.151314    
## tveng.MACCALLUM1              0.06469    0.11686   0.554 0.579866    
## tveng.JTAPPER1                0.10232    0.12252   0.835 0.403676    
## tveng.ACOOPER1               -0.04489    0.12078  -0.372 0.710138    
## tveng.BBAIER1                -0.15824    0.12277  -1.289 0.197430    
## tveng.CBSODONNELL1           -0.02543    0.12398  -0.205 0.837481    
## tveng.ABCWORLDNEWS1          -0.20103    0.12831  -1.567 0.117189    
## tveng.NBCLESTERHOLT1         -0.03719    0.12038  -0.309 0.757394    
## tveng.CUOMO1                 -0.22494    0.12378  -1.817 0.069182 .  
## tveng.EBURNETT1              -0.19659    0.12674  -1.551 0.120872    
## tveng.SIXTYMINUTES1           0.24404    0.12846   1.900 0.057464 .  
## tveng.TWENTY201              -0.06919    0.12760  -0.542 0.587644    
## tveng.DATELINE1              -0.07598    0.13098  -0.580 0.561835    
## tveng.FACETHENATION1         -0.08545    0.12654  -0.675 0.499500    
## tveng.MEETTHEPRESS1           0.01667    0.12789   0.130 0.896271    
## tveng.CBSTHISMORNING1         0.23420    0.12332   1.899 0.057545 .  
## tveng.GOODMORNINGAMERICA1    -0.21146    0.13045  -1.621 0.105013    
## tveng.TODAY1                 -0.15055    0.13351  -1.128 0.259472    
## tveng.FOXFRIENDS1             0.05170    0.13096   0.395 0.693010    
## tveng.MORNINGJOE1             0.19120    0.12681   1.508 0.131622    
## tveng.CHAYES1                 0.09177    0.12683   0.724 0.469306    
## tveng.BWILLIAMS1              0.04871    0.12797   0.381 0.703495    
## tveng.PBSNEWSHOUR1            0.02502    0.12790   0.196 0.844890    
## tvesp.ALPUNTO1               -1.11647    0.67237  -1.660 0.096815 .  
## tvesp.ALROJOVIVO1             0.27002    0.46098   0.586 0.558052    
## tvesp.DESPIERTAAMERICA1      -0.65782    0.74513  -0.883 0.377329    
## tvesp.HOY1                   -1.08281    0.80573  -1.344 0.178987    
## tvesp.LOMEJORDECASOCERRADO1   0.26501    0.56140   0.472 0.636888    
## tvesp.NOTICIEROESTRELLATV1   -0.17651    0.61138  -0.289 0.772803    
## tvesp.NOTICIEROTELEMUNDO1    -0.94931    0.85211  -1.114 0.265249    
## tvesp.NOTICIEROUNIVISION1     0.10864    0.61541   0.177 0.859874    
## tvesp.UNNUEVODIA1            -0.15733    0.62534  -0.252 0.801363    
## tvesp.PRIMERIMPACTO1          0.83269    0.56133   1.483 0.137965    
## reng.RUSHLIMBAUGH1            0.66721    0.18959   3.519 0.000433 ***
## reng.SEANHANNITYSHOW1         0.88759    0.22704   3.909 9.25e-05 ***
## reng.NPRMARKETPLACE1         -0.23999    0.34628  -0.693 0.488283    
## reng.NPRALLTHINGSCONSIDERED1 -1.19794    0.27736  -4.319 1.57e-05 ***
## reng.DAVERAMSEY1              0.64991    0.21887   2.969 0.002984 ** 
## reng.NPRMORNINGEDITION1      -0.38783    0.26643  -1.456 0.145478    
## reng.MARKLEVIN1              -0.05471    0.25189  -0.217 0.828058    
## reng.GLENNBECK1               0.30628    0.25537   1.199 0.230395    
## reng.MIKEGALLAGHER1           0.09193    0.50616   0.182 0.855875    
## reng.HUGHHEWITT1             -0.63683    0.57020  -1.117 0.264053    
## reng.THESAVAGENATION1        -0.25395    0.36655  -0.693 0.488421    
## reng.THEDANASHOW1             1.27181    0.67625   1.881 0.060014 .  
## reng.THOMHARTMANN1           -0.88742    1.24173  -0.715 0.474818    
## reng.JIMBOHANNON1             1.49925    0.88565   1.693 0.090488 .  
## reng.NPRFRESHAIR1            -0.32233    0.31311  -1.029 0.303266    
## nweb.YAHOONEWS1               0.28859    0.11670   2.473 0.013401 *  
## nweb.CNNNEWS1                -1.08988    0.13382  -8.145 3.81e-16 ***
## nweb.HUFFINGTONPOST1         -0.45550    0.19450  -2.342 0.019186 *  
## nweb.NYTIMES1                -0.34058    0.16643  -2.046 0.040714 *  
## nweb.BREITBART1              -0.02746    0.26384  -0.104 0.917107    
## nweb.FOXNEWS1                 1.50829    0.11328  13.314  < 2e-16 ***
## nweb.WASHINGTONPOST1         -0.21349    0.17568  -1.215 0.224292    
## nweb.THEGUARDIAN1             0.02575    0.23083   0.112 0.911189    
## nweb.USATODAY1                0.18340    0.15887   1.154 0.248333    
## nweb.BBCNEWS1                -0.30381    0.17219  -1.764 0.077672 .  
## nweb.NPRNEWS1                -0.44318    0.18946  -2.339 0.019326 *  
## nweb.DAILYCALLER1             1.33638    0.42729   3.128 0.001763 ** 
## nweb.BLOOMBERG1               0.09916    0.21594   0.459 0.646087    
## nweb.BUZZFEED1               -0.30753    0.18041  -1.705 0.088275 .  
## nweb.NBCNEWS1                -0.21677    0.17163  -1.263 0.206594    
## npprint.NYTIMES1             -0.66161    0.30062  -2.201 0.027748 *  
## npprint.USATODAY1             0.15814    0.22538   0.702 0.482893    
## npprint.WSJ1                  0.63070    0.24399   2.585 0.009739 ** 
## npprint.WASHPOST1            -0.19973    0.30774  -0.649 0.516317    
## npweb.NYTIMES1               -0.87311    0.20486  -4.262 2.03e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 7172.2  on 5795  degrees of freedom
## Residual deviance: 5578.2  on 5695  degrees of freedom
## AIC: 5780.2
## 
## Number of Fisher Scoring iterations: 6

New media associated with a significant INCREASE in the log odds of leaning Republican are:

-News Radio: reng.RUSHLIMBAUGH1, reng.SEANHANNITYSHOW1, reng.DAVERAMSEY1

-News Website: nweb.FOXNEWS1(#1), nweb.YAHOONEWS1, nweb.DAILYCALLER1

-News Paper: npprint.WSJ1

-Demographics: non-Hispanic white race

New media associated with a significant DECREASE in the log odds of leaning Republican are:

-News Radio: reng.NPRALLTHINGSCONSIDERED1

-News Website: nweb.CNNNEWS1, nweb.HUFFINGTONPOST1, nweb.NYTIMES1, nweb.NPRNEWS1

-New Paper: npprint.NYTIMES1(#1)

-News Paper/Website: npweb.NYTIMES1

-Demographics: non-Hispanic black race

# calculate prediction probabilities and predicted classes
probabs <- predict(train_model, testSet, type='response') 
preds <- ifelse(probabs > 0.5, 1, 0)
# confusion matrix
c.matrix<-CrossTable(x = testSet$rep, y = preds, prop.chisq=FALSE, prop.c = FALSE, prop.r = FALSE, dnn = c('actual Republican leaning', 'predicted Republican leaning'))
## 
##  
##    Cell Contents
## |-------------------------|
## |                       N |
## |         N / Table Total |
## |-------------------------|
## 
##  
## Total Observations in Table:  2484 
## 
##  
##                           | predicted Republican leaning 
## actual Republican leaning |         0 |         1 | Row Total | 
## --------------------------|-----------|-----------|-----------|
##                         0 |      1575 |       140 |      1715 | 
##                           |     0.634 |     0.056 |           | 
## --------------------------|-----------|-----------|-----------|
##                         1 |       506 |       263 |       769 | 
##                           |     0.204 |     0.106 |           | 
## --------------------------|-----------|-----------|-----------|
##              Column Total |      2081 |       403 |      2484 | 
## --------------------------|-----------|-----------|-----------|
## 
## 

The model classified 63.4% as true negatives (not likely to be Republican leaning), 0.56% as false positives, 20.4% as false negatives, and 10.6% as true positives (likely to be Republican leaning). The model has an accuracy rate of 74% and an error rate of 26%

library(pscl)
## Warning: package 'pscl' was built under R version 4.2.2
## Classes and Methods for R developed in the
## Political Science Computational Laboratory
## Department of Political Science
## Stanford University
## Simon Jackman
## hurdle and zeroinfl functions by Achim Zeileis
# assess model fit using McFadden R2
pscl::pR2(train_model)["McFadden"]
## fitting null model for pseudo-r2
##  McFadden 
## 0.2222478

Roughly 22.2 percent of the variation in the target vector is explained by the features included within the a20sub_f data frame. The model fit would be low for more serious projects.

Assess Features by Importance

# execute random forest using "train_model" and trainset data
train_model_vip <- randomForest(rep ~ . , data= trainSet, importance=TRUE)
#  check variable importance using 10 permutations 
v.imp<-PIMP(trainSet,trainSet$rep,train_model_vip,S=1, parallel=FALSE,ncores=0, seed = 123)
#  examine results and create data frame of vimp values
v.imp
## Call:
##  
## PIMP.default(X = trainSet, y = trainSet$rep, rForest = train_model_vip, 
##     S = 1, parallel = FALSE, ncores = 0, seed = 123)
## type: [1] "classification"
## 
## original VarImp:
##                                    VarImp
## male                         1.202125e-04
## GenZ                        -8.146028e-05
## GenY                         5.817062e-04
## GenX                         1.281823e-04
## BoomerGen                    6.992017e-04
## SilentGen                    1.510397e-04
## nh.white                     6.514752e-03
## nh.black                     4.379054e-03
## Hispanic                    -3.139630e-04
## nh.ANHPI                    -3.112118e-04
## nh.NAAN                     -9.750886e-05
## nh.multr                    -4.305180e-04
## inc.under30k                 3.302921e-03
## inc.30_59.9k                 6.245872e-04
## inc.60_99.9k                 1.056864e-03
## inc.100_250K                 2.587691e-03
## Lsh                          4.253950e-04
## Hs                           1.333309e-03
## Hsc                          1.666440e-03
## Cdg                          1.581044e-03
## Gdg                          2.638222e-03
## married                      5.134158e-03
## widowed                     -1.260446e-04
## divorced                     1.759841e-03
## separated                   -1.113131e-04
## nvmarried                    4.679203e-03
## tveng.COLBERT                2.894606e-03
## tveng.HANNITY                2.588379e-03
## tveng.TCARLSON               2.329922e-03
## tveng.RMADDOW                1.205315e-03
## tveng.LODONNELL              2.796188e-03
## tveng.THEFIVE                2.109967e-03
## tveng.THEINGRAHAMANGLE       1.745627e-03
## tveng.MACCALLUM              2.849806e-03
## tveng.JTAPPER                1.845346e-03
## tveng.ACOOPER                1.456450e-03
## tveng.BBAIER                 7.200097e-04
## tveng.CBSODONNELL            2.033985e-03
## tveng.ABCWORLDNEWS           7.457345e-04
## tveng.NBCLESTERHOLT          2.522942e-03
## tveng.CUOMO                  1.631536e-03
## tveng.EBURNETT               1.217994e-03
## tveng.SIXTYMINUTES           1.985092e-03
## tveng.TWENTY20               1.146606e-03
## tveng.DATELINE               1.480185e-03
## tveng.FACETHENATION          1.074103e-03
## tveng.MEETTHEPRESS           1.370159e-03
## tveng.CBSTHISMORNING         1.222926e-03
## tveng.GOODMORNINGAMERICA     1.112795e-03
## tveng.TODAY                  9.473975e-04
## tveng.FOXFRIENDS             1.226240e-03
## tveng.MORNINGJOE             1.950863e-03
## tveng.CHAYES                 1.090464e-03
## tveng.BWILLIAMS              2.203413e-03
## tveng.PBSNEWSHOUR            2.160141e-03
## tvesp.ALPUNTO                2.589907e-05
## tvesp.ALROJOVIVO            -5.546878e-05
## tvesp.DESPIERTAAMERICA      -4.084543e-05
## tvesp.HOY                   -4.903172e-05
## tvesp.LOMEJORDECASOCERRADO  -7.728455e-05
## tvesp.NOTICIEROESTRELLATV   -1.792622e-06
## tvesp.NOTICIEROTELEMUNDO     7.906643e-06
## tvesp.NOTICIEROUNIVISION    -5.457258e-05
## tvesp.UNNUEVODIA            -8.317339e-05
## tvesp.PRIMERIMPACTO         -4.048362e-05
## reng.RUSHLIMBAUGH            8.495386e-03
## reng.SEANHANNITYSHOW         8.708638e-03
## reng.NPRMARKETPLACE         -1.339517e-03
## reng.NPRALLTHINGSCONSIDERED -1.562947e-03
## reng.DAVERAMSEY              1.831190e-03
## reng.NPRMORNINGEDITION      -8.945115e-04
## reng.MARKLEVIN               4.322447e-03
## reng.GLENNBECK               2.128466e-03
## reng.MIKEGALLAGHER           1.062269e-04
## reng.HUGHHEWITT              9.391757e-05
## reng.THESAVAGENATION         2.878322e-04
## reng.THEDANASHOW             1.074267e-04
## reng.THOMHARTMANN           -5.459673e-06
## reng.JIMBOHANNON             1.782467e-05
## reng.NPRFRESHAIR            -5.680513e-05
## nweb.YAHOONEWS               6.190279e-04
## nweb.CNNNEWS                -1.774753e-03
## nweb.HUFFINGTONPOST         -3.428093e-03
## nweb.NYTIMES                -3.546159e-03
## nweb.BREITBART               1.635673e-03
## nweb.FOXNEWS                 2.022386e-02
## nweb.WASHINGTONPOST         -2.109227e-03
## nweb.THEGUARDIAN            -5.187124e-04
## nweb.USATODAY                5.317483e-04
## nweb.BBCNEWS                 2.576091e-03
## nweb.NPRNEWS                -2.111976e-03
## nweb.DAILYCALLER             2.578940e-04
## nweb.BLOOMBERG               9.395018e-04
## nweb.BUZZFEED               -1.137865e-03
## nweb.NBCNEWS                -7.642632e-04
## npprint.NYTIMES             -1.273899e-03
## npprint.USATODAY             1.644671e-04
## npprint.WSJ                  1.533923e-04
## npprint.WASHPOST            -8.240470e-05
## npweb.NYTIMES               -4.669262e-03
## 
## permutation VarImp:
##                                      [,1]
## male                         4.972836e-05
## GenZ                        -3.202312e-05
## GenY                         5.219197e-04
## GenX                        -1.716082e-04
## BoomerGen                    1.927641e-04
## SilentGen                   -2.062405e-04
## nh.white                     2.288811e-04
## nh.black                    -1.343985e-04
## Hispanic                     2.855012e-04
## nh.ANHPI                    -2.022958e-04
## nh.NAAN                     -4.247370e-05
## nh.multr                    -1.586490e-04
## inc.under30k                 6.116821e-04
## inc.30_59.9k                 2.287183e-04
## inc.60_99.9k                 1.531390e-04
## inc.100_250K                 9.148486e-04
## Lsh                         -1.161979e-04
## Hs                           9.953773e-04
## Hsc                          8.435593e-04
## Cdg                          6.083576e-04
## Gdg                          8.098471e-04
## rep                          4.516369e-04
## married                      8.125889e-04
## widowed                      5.707566e-04
## divorced                     3.918397e-04
## separated                    5.214545e-05
## nvmarried                    1.575817e-04
## tveng.COLBERT                2.879366e-04
## tveng.HANNITY                5.160922e-04
## tveng.TCARLSON               9.204444e-04
## tveng.RMADDOW                2.742878e-04
## tveng.LODONNELL              9.327867e-04
## tveng.THEFIVE                1.681578e-04
## tveng.THEINGRAHAMANGLE       1.907993e-04
## tveng.MACCALLUM              4.700246e-04
## tveng.JTAPPER                4.661881e-04
## tveng.ACOOPER                1.695648e-03
## tveng.BBAIER                 1.422701e-03
## tveng.CBSODONNELL            5.752794e-04
## tveng.ABCWORLDNEWS           5.532496e-04
## tveng.NBCLESTERHOLT          1.411572e-03
## tveng.CUOMO                  5.228775e-04
## tveng.EBURNETT               1.178129e-03
## tveng.SIXTYMINUTES           1.531403e-03
## tveng.TWENTY20               7.092205e-04
## tveng.DATELINE               5.400833e-04
## tveng.FACETHENATION          7.691613e-05
## tveng.MEETTHEPRESS           9.238418e-04
## tveng.CBSTHISMORNING         3.967587e-04
## tveng.GOODMORNINGAMERICA     5.416110e-04
## tveng.TODAY                  1.935160e-04
## tveng.FOXFRIENDS             1.215673e-03
## tveng.MORNINGJOE             1.266819e-03
## tveng.CHAYES                 1.627723e-03
## tveng.BWILLIAMS              6.366629e-04
## tveng.PBSNEWSHOUR            1.117881e-03
## tvesp.ALPUNTO                5.521142e-05
## tvesp.ALROJOVIVO             3.328859e-07
## tvesp.DESPIERTAAMERICA       1.979430e-05
## tvesp.HOY                    1.856058e-04
## tvesp.LOMEJORDECASOCERRADO  -9.459808e-06
## tvesp.NOTICIEROESTRELLATV    3.653558e-05
## tvesp.NOTICIEROTELEMUNDO     6.375937e-04
## tvesp.NOTICIEROUNIVISION     1.687096e-05
## tvesp.UNNUEVODIA            -7.862901e-06
## tvesp.PRIMERIMPACTO         -8.551228e-07
## reng.RUSHLIMBAUGH           -6.686720e-05
## reng.SEANHANNITYSHOW         1.472007e-04
## reng.NPRMARKETPLACE          3.444723e-04
## reng.NPRALLTHINGSCONSIDERED  7.096634e-04
## reng.DAVERAMSEY             -1.670318e-04
## reng.NPRMORNINGEDITION       6.602190e-04
## reng.MARKLEVIN               1.586070e-04
## reng.GLENNBECK              -9.027497e-05
## reng.MIKEGALLAGHER           1.695212e-04
## reng.HUGHHEWITT             -8.277655e-06
## reng.THESAVAGENATION         7.785791e-06
## reng.THEDANASHOW             1.010309e-06
## reng.THOMHARTMANN           -1.415716e-05
## reng.JIMBOHANNON            -5.546549e-06
## reng.NPRFRESHAIR             5.823757e-04
## nweb.YAHOONEWS               4.107355e-05
## nweb.CNNNEWS                 4.763294e-04
## nweb.HUFFINGTONPOST          1.134595e-03
## nweb.NYTIMES                 1.792342e-03
## nweb.BREITBART               5.461463e-05
## nweb.FOXNEWS                 5.176909e-05
## nweb.WASHINGTONPOST          1.842233e-03
## nweb.THEGUARDIAN             4.537737e-04
## nweb.USATODAY                9.027628e-04
## nweb.BBCNEWS                 1.946635e-03
## nweb.NPRNEWS                 9.713848e-04
## nweb.DAILYCALLER             1.667970e-05
## nweb.BLOOMBERG               1.286231e-03
## nweb.BUZZFEED                1.049028e-03
## nweb.NBCNEWS                -1.997094e-04
## npprint.NYTIMES              3.195385e-04
## npprint.USATODAY             7.186627e-04
## npprint.WSJ                 -9.284456e-05
## npprint.WASHPOST             6.646830e-04
## npweb.NYTIMES                7.022618e-04

Among the original estimates of variable importance, “nweb.FOXNEWS” is the most important feature influencing/increasing the log odds of leaning Republican (+log odds), and “npweb.NYTIMES” is the least important feature influencing/decreasing the log odds of leaning Republican (-log odds).

I copy the original rendering and paste it into “notepad”. This situates the contents so that they can be cleanly pasted into an excel spreadsheet. I then paste the contents into excel using the the import wizard as a data frame containing two columns- the feature name and its corresponding original importance value. This is a practical way to overcome some of the difficulties of manipulating the contents of the “v.imp” object (PIMP object). I only include the most and least important features for graphical display.

# call "v.imp" object data
MostLeastImp<-read.xlsx("C:\\Users\\Jaire\\OneDrive\\Desktop\\Exploratory Research\\ML\\v.imp_contents_MostLeast.xlsx")
# view v.imp object contents
print(MostLeastImp)
##    ID                     Feature        VarImp
## 1   1           reng.RUSHLIMBAUGH  0.0084953860
## 2   2        reng.SEANHANNITYSHOW  0.0087086380
## 3   3             reng.DAVERAMSEY  0.0018311900
## 4   4                nweb.FOXNEWS  0.0202238600
## 5   5              nweb.YAHOONEWS  0.0006190279
## 6   6            nweb.DAILYCALLER  0.0002578940
## 7   7                 npprint.WSJ  0.0001533923
## 8   8                    nh.white  0.0065147520
## 9   9 reng.NPRALLTHINGSCONSIDERED -0.0015629470
## 10 10                nweb.CNNNEWS -0.0017747530
## 11 11         nweb.HUFFINGTONPOST -0.0034280930
## 12 12                nweb.NYTIMES -0.0035461590
## 13 13                nweb.NPRNEWS -0.0021119760
## 14 14             npprint.NYTIMES -0.0012738990
## 15 15               npweb.NYTIMES -0.0046692620
## 16 16                    nh.black  0.0043790540
# plot of v.imp_contents
ggplot(data = MostLeastImp, mapping = aes(x = ID, y = VarImp)) +
    geom_point(alpha = 1, aes(color = Feature))

# bargraph of v.imp_contents
ggplot(MostLeastImp, aes(x = ID, y = VarImp, fill = Feature)) +
  geom_bar(stat = "identity")