Setup

Libraries and Function

library(tidyverse)
library(haven) # read_dta()
library(magrittr) # %<>%
library(labelled) # var_label()
library(knitr) # kabel()
library(DT) # datatable()
library(GGally) # ggpairs()
library(moderndive) # get_regression...
library(fastDummies) # dummy_cols()
library(gvlma) # gvlma()

# removes numbering from factor levels
level_tidy <-
  function(variable){
    variable %>% 
      str_split(" ", n = 2) %>%
      map_chr(2)
  }

Plot Zooming

https://stackoverflow.com/a/59401761

Data

Load

https://electionstudies.org/data-center/2016-time-series-study/

anes2016 <-
  read_dta("G:/My Drive/homework/Andrea P/Project 2016/anes_timeseries_2016.dta",
           col_select = c(V161095, V161096, V161241, V161126, V161155, V161342))

anes2016 %<>% 
  mutate(DemocraticThermometer = V161095, # Dependent Variable
         RepublicanThermometer = V161096, # Dependent Variable
         Religion = V161241,
         Ideology = V161126, 
         PartyID = V161155,
         Gender = V161342,
         .keep = "unused"
         )

Tables

# Dropped negative levels
anes2016 %>%
  pivot_longer(names_to = "Variable",
               values_to = "DroppedLevel",
               cols = everything()
               ) %>%
  filter(DroppedLevel < 0) %>%
  table() %>% 
  addmargins() %>%
  kable(caption = "Count of Negative Levels")
Count of Negative Levels
-99 -89 -88 -9 -8 Sum
DemocraticThermometer 49 6 15 0 0 70
Gender 0 0 0 41 0 41
Ideology 0 0 0 18 5 23
PartyID 0 0 0 15 10 25
Religion 0 0 0 21 6 27
RepublicanThermometer 64 5 17 0 0 86
Sum 113 11 32 95 21 272
# All levels (thermometers are mixed continuous and categorical)
options(knitr.kable.NA = '')

anes2016 %>%
  select(-DemocraticThermometer, -RepublicanThermometer) %>%
  pivot_longer(names_to = "Variable",
               values_to = "Level",
               cols = everything()
               ) ->
  df

anes2016 %>%
  select(DemocraticThermometer, RepublicanThermometer) %>% 
  pivot_longer(names_to = "Variable",
               values_to = "Level",
               cols = everything()
               ) %>%
  filter(Level < 0) %>%
  bind_rows(df) %>%
  group_by(Variable, Level) %>%
  summarize(Percentage = n() / nrow(anes2016) * 100) %>% # divisor = # category obs
  pivot_wider(names_from = Level, values_from = Percentage) %>%
  kable(digits = 1, caption = "Level % of Category Observations")
Level % of Category Observations
Variable -99 -89 -88 -9 1 2 3 -8 4 5 6 7 99 0
DemocraticThermometer 1.1 0.1 0.4
Gender 1.0 46.5 52.2 0.3
Ideology 0.4 3.4 11.9 8.9 0.1 20.9 11.9 16.5 3.9 22.1
PartyID 0.4 34.0 28.8 32.0 0.2 3.5 1.1
Religion 0.5 65.1 34.2 0.1
RepublicanThermometer 1.5 0.1 0.4
rm(df)  

Factor Level Transformations

anes2016 %<>% 
   filter(if_all(.cols = everything(),
                 .fns = ~ .x >= 0) # drop all negative codes (levels)
         ) %>%
  mutate(across(.cols = c(-DemocraticThermometer, -RepublicanThermometer), 
                .fns = as_factor), # categorical data
         across(.cols = c(-DemocraticThermometer, -RepublicanThermometer), 
                .fns = fct_drop), # drop empty levels
         across(.cols = c(-DemocraticThermometer, -RepublicanThermometer), 
                .fns = ~ fct_relabel(.x, ~ level_tidy(.x))), # remove level numbering
         PartyID = fct_other(PartyID, # combine levels
                             drop = c("No preference (FTF ONLY)",
                                      "Other party SPECIFY"), 
                             other_level = "PartyID_Other"),
         PartyID = fct_relevel(PartyID, "PartyID_Other", after = 0), # reference level
         Ideology = fct_collapse(Ideology, # collapse levels
                                 Liberal = c("Extremely liberal",
                                             "Liberal",
                                             "Slightly liberal"),
                                 Moderate = "Moderate, middle of the road",
                                 Conservative = c("Slightly conservative",
                                                  "Conservative",
                                                  "Extremely conservative"),
                                 Ideology_Other = 
                          "Haven't thought much about this (FTF ONLY: DO NOT PROBE)"),
         Ideology = fct_relevel(Ideology, "Ideology_Other", after = 0), # ref level
         Gender = fct_relevel(Gender, "Other", after = 0), # reference level
         Religion = fct_relevel(Religion, # reference level
                                           "Not important",
                                           after = 0
                                           )
         )

# Possible additions:
# Income: V161361x
# Location: V163001b, V163002, V163003, V163004

# Removed: V161264x (religion summary) replaced by V161241
# Removed: V161158x (Party ID) replaced  by V161155
# Dropped: V161137 (income gap), V161019 (insignificant p-values for all levels)

Code Book

# V161095 Feeling Thermometer: Democratic Party
# https://electionstudies.org/wp-content/uploads/2018/12/anes_timeseries_2016_userguidecodebook.pdf#page=209

# V161096 PRE: Feeling Thermometer: Republican Party
# https://electionstudies.org/wp-content/uploads/2018/12/anes_timeseries_2016_userguidecodebook.pdf#page=210

# V161241 PRE: Is religion important part of R life
# https://electionstudies.org/wp-content/uploads/2018/12/anes_timeseries_2016_userguidecodebook.pdf#page=422

# V161126 PRE: 7pt scale Liberal conservative self-placement
# https://electionstudies.org/wp-content/uploads/2018/12/anes_timeseries_2016_userguidecodebook.pdf#page=243

# V161137 PRE: Income gap today more or less than 20 years ago
# https://electionstudies.org/wp-content/uploads/2018/12/anes_timeseries_2016_userguidecodebook.pdf#page=261

# V161155: PRE: Party ID: Does R think of self as Dem, Rep, Ind or what
# https://electionstudies.org/wp-content/uploads/2018/12/anes_timeseries_2016_userguidecodebook.pdf#page=292


# https://stackoverflow.com/a/54282131
data.frame(
  "name" = names(anes2016),
  "label" = sapply(anes2016,
                   function(x) var_label(x)
                   ) %>% as.character()
  ) %>%
  kable()
name label
DemocraticThermometer PRE: Feeling Thermometer: Democratic Party
RepublicanThermometer PRE: Feeling Thermometer: Republican Party
Religion PRE: Is religion important part of R life
Ideology PRE: 7pt scale Liberal conservative self-placement
PartyID PRE: Party ID: Does R think of self as Dem, Rep, Ind or what
Gender PRE FTF CASI / WEB: R self-identified gender

EDA

anes2016 %>% summary()
##  DemocraticThermometer RepublicanThermometer          Religion   
##  Min.   :  0.00        Min.   :  0.00        Not important:1411  
##  1st Qu.: 22.00        1st Qu.: 17.00        Important    :2675  
##  Median : 50.00        Median : 50.00                            
##  Mean   : 47.96        Mean   : 43.56                            
##  3rd Qu.: 70.00        3rd Qu.: 60.00                            
##  Max.   :100.00        Max.   :100.00                            
##            Ideology             PartyID        Gender    
##  Ideology_Other: 882   PartyID_Other: 182   Other :  10  
##  Liberal       :1003   Democrat     :1398   Male  :1926  
##  Moderate      : 859   Republican   :1206   Female:2150  
##  Conservative  :1342   Independent  :1300                
##                                                          
## 
anes2016 %>% datatable(options = list(pageLength = 5))
anes2016 %>% ggpairs()

anes2016 %>%
  ggplot(aes(y = DemocraticThermometer, fill = Gender, color = Religion)) +
  geom_boxplot(varwidth = TRUE) +
  facet_grid(rows = vars(Ideology), cols = vars(PartyID)) +
  theme(axis.text.x = element_blank(), axis.ticks.x = element_blank())

anes2016 %>%
  ggplot(aes(y = DemocraticThermometer, fill = Religion, color = Gender)) +
  geom_boxplot(varwidth = TRUE) +
  facet_grid(rows = vars(Ideology), cols = vars(PartyID)) +
  theme(axis.text.x = element_blank(), axis.ticks.x = element_blank())

anes2016 %>%
  ggplot(aes(x = DemocraticThermometer,
             y = RepublicanThermometer,
             color = Gender,
             shape = Gender)) + 
  geom_point() +
  geom_smooth(method = "lm", se = FALSE) +
  facet_grid(rows = vars(Ideology), cols = vars(PartyID))

anes2016 %>%
  ggplot(aes(x = DemocraticThermometer, 
             y = RepublicanThermometer,
             shape = Religion,
             color = Religion)) +
  geom_point() +
  geom_smooth(method = "lm", se = FALSE) +
  facet_grid(rows = vars(Ideology), cols = vars(PartyID))

anes2016 %>%
  select(DemocraticThermometer, RepublicanThermometer) %>%
  pivot_longer(names_to = "Variable",
               values_to = "Value",
               cols = everything()) %>%
  group_by(Variable) %>%
  ggplot(aes(sample = (Value))) +
  geom_qq() +
  geom_qq_line(aes(color = "red")) +
  facet_wrap(~Variable) +
  ggtitle("QQ-Plot of Response (Dependent) Variables") +
  theme(plot.title = element_text(hjust = 0.5))

anes2016 %>%
  select(DemocraticThermometer, RepublicanThermometer) %>%
  pivot_longer(names_to = "Variable",
               values_to = "Value",
               cols = everything()) %>%
  group_by(Variable) %>%
  ggplot(aes(y = Value)) +
  geom_boxplot() +
  facet_wrap(~Variable) +
  theme(axis.text.x = element_blank(), axis.ticks.x = element_blank())

Linear Models

Democratic Thermometer

DemocraticThermometer.model <-
  anes2016 %>%
  select(-RepublicanThermometer) %>%
  lm(DemocraticThermometer ~ ., data = .)

DemocraticThermometer.coef <-
  DemocraticThermometer.model %>%
  get_regression_table() %>%
  select(term, estimate, p_value) %>%
  mutate(Democratic_Estimate = estimate,
         Democratic_P_Value = p_value,
         .keep = "unused")

DemocraticThermometer.summary <-
  DemocraticThermometer.model %>%
  get_regression_summaries()

# summary(model, correlation = TRUE))

Republican Thermometer

RepublicanThermometer.model <-
  anes2016 %>%
  select(-DemocraticThermometer) %>%
  lm(RepublicanThermometer ~ ., data = .)

RepublicanThermometer.coef <-
  RepublicanThermometer.model  %>%
  get_regression_table() %>%
  select(term, estimate, p_value) %>%
  mutate(Republican_Estimate = estimate,
         Republican_P_Value = p_value,
         .keep = "unused")

RepublicanThermometer.summary <-
  RepublicanThermometer.model %>%
  get_regression_summaries()

Assumptions & Analysis

http://r-statistics.co/Assumptions-of-Linear-Regression.html

Democratic Thermormeter

anes2016 %>%
  select(-RepublicanThermometer) %>%
  slice(568, 1238, 1548, 3386, 3506, 3731) %>%
  kable(caption = "Observations to Notice")
Observations to Notice
DemocraticThermometer Religion Ideology PartyID Gender
100 Important Conservative Republican Male
40 Not important Liberal Democrat Other
0 Important Liberal Democrat Female
100 Important Liberal Democrat Other
0 Important Ideology_Other Independent Other
100 Important Conservative Republican Male
DemocraticThermometer.model %>% gvlma()
## 
## Call:
## lm(formula = DemocraticThermometer ~ ., data = .)
## 
## Coefficients:
##          (Intercept)     ReligionImportant       IdeologyLiberal  
##              37.8463                1.6607                6.5310  
##     IdeologyModerate  IdeologyConservative       PartyIDDemocrat  
##               0.9419              -12.5531               31.0464  
##    PartyIDRepublican    PartyIDIndependent            GenderMale  
##              -8.9451                5.6356                0.5931  
##         GenderFemale  
##               2.4612  
## 
## 
## ASSESSMENT OF THE LINEAR MODEL ASSUMPTIONS
## USING THE GLOBAL TEST ON 4 DEGREES-OF-FREEDOM:
## Level of Significance =  0.05 
## 
## Call:
##  gvlma(x = .) 
## 
##                     Value   p-value                   Decision
## Global Stat        40.401 3.576e-08 Assumptions NOT satisfied!
## Skewness           18.067 2.132e-05 Assumptions NOT satisfied!
## Kurtosis            7.975 4.742e-03 Assumptions NOT satisfied!
## Link Function      11.327 7.638e-04 Assumptions NOT satisfied!
## Heteroscedasticity  3.032 8.166e-02    Assumptions acceptable.
DemocraticThermometer.model %>% plot(., which = c(1:6)) # help(plot.lm)

Republican Thermometer

anes2016 %>%
  select(-DemocraticThermometer) %>%
  slice(1429, 1548, 1636, 2194, 3234, 3506) %>%
  kable(caption = "Observations to Notice")
Observations to Notice
RepublicanThermometer Religion Ideology PartyID Gender
0 Important Ideology_Other Independent Other
100 Important Liberal Democrat Female
0 Important Conservative Republican Female
70 Important Conservative Independent Other
100 Not important Ideology_Other Democrat Male
0 Important Ideology_Other Independent Other
RepublicanThermometer.model %>% gvlma()
## 
## Call:
## lm(formula = RepublicanThermometer ~ ., data = .)
## 
## Coefficients:
##          (Intercept)     ReligionImportant       IdeologyLiberal  
##               15.342                 4.698               -12.296  
##     IdeologyModerate  IdeologyConservative       PartyIDDemocrat  
##               -1.640                 2.778                -6.497  
##    PartyIDRepublican    PartyIDIndependent            GenderMale  
##               25.891                 6.521                19.361  
##         GenderFemale  
##               20.864  
## 
## 
## ASSESSMENT OF THE LINEAR MODEL ASSUMPTIONS
## USING THE GLOBAL TEST ON 4 DEGREES-OF-FREEDOM:
## Level of Significance =  0.05 
## 
## Call:
##  gvlma(x = .) 
## 
##                      Value   p-value                   Decision
## Global Stat        19.8185 0.0005423 Assumptions NOT satisfied!
## Skewness           13.4486 0.0002452 Assumptions NOT satisfied!
## Kurtosis            0.9253 0.3360766    Assumptions acceptable.
## Link Function       0.9979 0.3178279    Assumptions acceptable.
## Heteroscedasticity  4.4468 0.0349676 Assumptions NOT satisfied!
RepublicanThermometer.model %>% plot(., which = c(1:6))

Compare Models

DemocraticThermometer.coef %>% 
  full_join(RepublicanThermometer.coef) %>%
  kable()
## Joining, by = "term"
term Democratic_Estimate Democratic_P_Value Republican_Estimate Republican_P_Value
intercept 37.846 0.000 15.342 0.026
Religion: Important 1.661 0.024 4.698 0.000
Ideology: Liberal 6.531 0.000 -12.296 0.000
Ideology: Moderate 0.942 0.355 -1.640 0.107
Ideology: Conservative -12.553 0.000 2.778 0.006
PartyID: Democrat 31.046 0.000 -6.497 0.000
PartyID: Republican -8.945 0.000 25.891 0.000
PartyID: Independent 5.636 0.001 6.521 0.000
Gender: Male 0.593 0.929 19.361 0.004
Gender: Female 2.461 0.713 20.864 0.002
bind_rows(DemocraticThermometer.summary,
          RepublicanThermometer.summary,
          .id = "Model"
          ) %>%
  mutate(Model = recode(Model, `1` = "Democratic", `2` = "Republican")) %>%
  kable()
Model r_squared adj_r_squared mse rmse sigma statistic p_value df nobs
Democratic 0.508 0.507 444.5345 21.08399 21.110 467.554 0 9 4086
Republican 0.407 0.405 443.9693 21.07058 21.096 310.264 0 9 4086