Data
Load
https://electionstudies.org/data-center/2016-time-series-study/
anes2016 <-
read_dta("G:/My Drive/homework/Andrea P/Project 2016/anes_timeseries_2016.dta",
col_select = c(V161095, V161096, V161241, V161126, V161155, V161342))
anes2016 %<>%
mutate(DemocraticThermometer = V161095, # Dependent Variable
RepublicanThermometer = V161096, # Dependent Variable
Religion = V161241,
Ideology = V161126,
PartyID = V161155,
Gender = V161342,
.keep = "unused"
)
Tables
# Dropped negative levels
anes2016 %>%
pivot_longer(names_to = "Variable",
values_to = "DroppedLevel",
cols = everything()
) %>%
filter(DroppedLevel < 0) %>%
table() %>%
addmargins() %>%
kable(caption = "Count of Negative Levels")
Count of Negative Levels
DemocraticThermometer |
49 |
6 |
15 |
0 |
0 |
70 |
Gender |
0 |
0 |
0 |
41 |
0 |
41 |
Ideology |
0 |
0 |
0 |
18 |
5 |
23 |
PartyID |
0 |
0 |
0 |
15 |
10 |
25 |
Religion |
0 |
0 |
0 |
21 |
6 |
27 |
RepublicanThermometer |
64 |
5 |
17 |
0 |
0 |
86 |
Sum |
113 |
11 |
32 |
95 |
21 |
272 |
# All levels (thermometers are mixed continuous and categorical)
options(knitr.kable.NA = '')
anes2016 %>%
select(-DemocraticThermometer, -RepublicanThermometer) %>%
pivot_longer(names_to = "Variable",
values_to = "Level",
cols = everything()
) ->
df
anes2016 %>%
select(DemocraticThermometer, RepublicanThermometer) %>%
pivot_longer(names_to = "Variable",
values_to = "Level",
cols = everything()
) %>%
filter(Level < 0) %>%
bind_rows(df) %>%
group_by(Variable, Level) %>%
summarize(Percentage = n() / nrow(anes2016) * 100) %>% # divisor = # category obs
pivot_wider(names_from = Level, values_from = Percentage) %>%
kable(digits = 1, caption = "Level % of Category Observations")
Level % of Category Observations
DemocraticThermometer |
1.1 |
0.1 |
0.4 |
|
|
|
|
|
|
|
|
|
|
|
Gender |
|
|
|
1.0 |
46.5 |
52.2 |
0.3 |
|
|
|
|
|
|
|
Ideology |
|
|
|
0.4 |
3.4 |
11.9 |
8.9 |
0.1 |
20.9 |
11.9 |
16.5 |
3.9 |
22.1 |
|
PartyID |
|
|
|
0.4 |
34.0 |
28.8 |
32.0 |
0.2 |
|
3.5 |
|
|
|
1.1 |
Religion |
|
|
|
0.5 |
65.1 |
34.2 |
|
0.1 |
|
|
|
|
|
|
RepublicanThermometer |
1.5 |
0.1 |
0.4 |
|
|
|
|
|
|
|
|
|
|
|
rm(df)
Factor Level Transformations
anes2016 %<>%
filter(if_all(.cols = everything(),
.fns = ~ .x >= 0) # drop all negative codes (levels)
) %>%
mutate(across(.cols = c(-DemocraticThermometer, -RepublicanThermometer),
.fns = as_factor), # categorical data
across(.cols = c(-DemocraticThermometer, -RepublicanThermometer),
.fns = fct_drop), # drop empty levels
across(.cols = c(-DemocraticThermometer, -RepublicanThermometer),
.fns = ~ fct_relabel(.x, ~ level_tidy(.x))), # remove level numbering
PartyID = fct_other(PartyID, # combine levels
drop = c("No preference (FTF ONLY)",
"Other party SPECIFY"),
other_level = "PartyID_Other"),
PartyID = fct_relevel(PartyID, "PartyID_Other", after = 0), # reference level
Ideology = fct_collapse(Ideology, # collapse levels
Liberal = c("Extremely liberal",
"Liberal",
"Slightly liberal"),
Moderate = "Moderate, middle of the road",
Conservative = c("Slightly conservative",
"Conservative",
"Extremely conservative"),
Ideology_Other =
"Haven't thought much about this (FTF ONLY: DO NOT PROBE)"),
Ideology = fct_relevel(Ideology, "Ideology_Other", after = 0), # ref level
Gender = fct_relevel(Gender, "Other", after = 0), # reference level
Religion = fct_relevel(Religion, # reference level
"Not important",
after = 0
)
)
# Possible additions:
# Income: V161361x
# Location: V163001b, V163002, V163003, V163004
# Removed: V161264x (religion summary) replaced by V161241
# Removed: V161158x (Party ID) replaced by V161155
# Dropped: V161137 (income gap), V161019 (insignificant p-values for all levels)
Code Book
# V161095 Feeling Thermometer: Democratic Party
# https://electionstudies.org/wp-content/uploads/2018/12/anes_timeseries_2016_userguidecodebook.pdf#page=209
# V161096 PRE: Feeling Thermometer: Republican Party
# https://electionstudies.org/wp-content/uploads/2018/12/anes_timeseries_2016_userguidecodebook.pdf#page=210
# V161241 PRE: Is religion important part of R life
# https://electionstudies.org/wp-content/uploads/2018/12/anes_timeseries_2016_userguidecodebook.pdf#page=422
# V161126 PRE: 7pt scale Liberal conservative self-placement
# https://electionstudies.org/wp-content/uploads/2018/12/anes_timeseries_2016_userguidecodebook.pdf#page=243
# V161137 PRE: Income gap today more or less than 20 years ago
# https://electionstudies.org/wp-content/uploads/2018/12/anes_timeseries_2016_userguidecodebook.pdf#page=261
# V161155: PRE: Party ID: Does R think of self as Dem, Rep, Ind or what
# https://electionstudies.org/wp-content/uploads/2018/12/anes_timeseries_2016_userguidecodebook.pdf#page=292
# https://stackoverflow.com/a/54282131
data.frame(
"name" = names(anes2016),
"label" = sapply(anes2016,
function(x) var_label(x)
) %>% as.character()
) %>%
kable()
DemocraticThermometer |
PRE: Feeling Thermometer: Democratic Party |
RepublicanThermometer |
PRE: Feeling Thermometer: Republican Party |
Religion |
PRE: Is religion important part of R life |
Ideology |
PRE: 7pt scale Liberal conservative self-placement |
PartyID |
PRE: Party ID: Does R think of self as Dem, Rep, Ind or what |
Gender |
PRE FTF CASI / WEB: R self-identified gender |
EDA
anes2016 %>% summary()
## DemocraticThermometer RepublicanThermometer Religion
## Min. : 0.00 Min. : 0.00 Not important:1411
## 1st Qu.: 22.00 1st Qu.: 17.00 Important :2675
## Median : 50.00 Median : 50.00
## Mean : 47.96 Mean : 43.56
## 3rd Qu.: 70.00 3rd Qu.: 60.00
## Max. :100.00 Max. :100.00
## Ideology PartyID Gender
## Ideology_Other: 882 PartyID_Other: 182 Other : 10
## Liberal :1003 Democrat :1398 Male :1926
## Moderate : 859 Republican :1206 Female:2150
## Conservative :1342 Independent :1300
##
##
anes2016 %>% datatable(options = list(pageLength = 5))
anes2016 %>% ggpairs()

anes2016 %>%
ggplot(aes(y = DemocraticThermometer, fill = Gender, color = Religion)) +
geom_boxplot(varwidth = TRUE) +
facet_grid(rows = vars(Ideology), cols = vars(PartyID)) +
theme(axis.text.x = element_blank(), axis.ticks.x = element_blank())

anes2016 %>%
ggplot(aes(y = DemocraticThermometer, fill = Religion, color = Gender)) +
geom_boxplot(varwidth = TRUE) +
facet_grid(rows = vars(Ideology), cols = vars(PartyID)) +
theme(axis.text.x = element_blank(), axis.ticks.x = element_blank())

anes2016 %>%
ggplot(aes(x = DemocraticThermometer,
y = RepublicanThermometer,
color = Gender,
shape = Gender)) +
geom_point() +
geom_smooth(method = "lm", se = FALSE) +
facet_grid(rows = vars(Ideology), cols = vars(PartyID))

anes2016 %>%
ggplot(aes(x = DemocraticThermometer,
y = RepublicanThermometer,
shape = Religion,
color = Religion)) +
geom_point() +
geom_smooth(method = "lm", se = FALSE) +
facet_grid(rows = vars(Ideology), cols = vars(PartyID))

anes2016 %>%
select(DemocraticThermometer, RepublicanThermometer) %>%
pivot_longer(names_to = "Variable",
values_to = "Value",
cols = everything()) %>%
group_by(Variable) %>%
ggplot(aes(sample = (Value))) +
geom_qq() +
geom_qq_line(aes(color = "red")) +
facet_wrap(~Variable) +
ggtitle("QQ-Plot of Response (Dependent) Variables") +
theme(plot.title = element_text(hjust = 0.5))

anes2016 %>%
select(DemocraticThermometer, RepublicanThermometer) %>%
pivot_longer(names_to = "Variable",
values_to = "Value",
cols = everything()) %>%
group_by(Variable) %>%
ggplot(aes(y = Value)) +
geom_boxplot() +
facet_wrap(~Variable) +
theme(axis.text.x = element_blank(), axis.ticks.x = element_blank())

Linear Models
Democratic Thermometer
DemocraticThermometer.model <-
anes2016 %>%
select(-RepublicanThermometer) %>%
lm(DemocraticThermometer ~ ., data = .)
DemocraticThermometer.coef <-
DemocraticThermometer.model %>%
get_regression_table() %>%
select(term, estimate, p_value) %>%
mutate(Democratic_Estimate = estimate,
Democratic_P_Value = p_value,
.keep = "unused")
DemocraticThermometer.summary <-
DemocraticThermometer.model %>%
get_regression_summaries()
# summary(model, correlation = TRUE))
Republican Thermometer
RepublicanThermometer.model <-
anes2016 %>%
select(-DemocraticThermometer) %>%
lm(RepublicanThermometer ~ ., data = .)
RepublicanThermometer.coef <-
RepublicanThermometer.model %>%
get_regression_table() %>%
select(term, estimate, p_value) %>%
mutate(Republican_Estimate = estimate,
Republican_P_Value = p_value,
.keep = "unused")
RepublicanThermometer.summary <-
RepublicanThermometer.model %>%
get_regression_summaries()
Compare Models
DemocraticThermometer.coef %>%
full_join(RepublicanThermometer.coef) %>%
kable()
## Joining, by = "term"
intercept |
37.846 |
0.000 |
15.342 |
0.026 |
Religion: Important |
1.661 |
0.024 |
4.698 |
0.000 |
Ideology: Liberal |
6.531 |
0.000 |
-12.296 |
0.000 |
Ideology: Moderate |
0.942 |
0.355 |
-1.640 |
0.107 |
Ideology: Conservative |
-12.553 |
0.000 |
2.778 |
0.006 |
PartyID: Democrat |
31.046 |
0.000 |
-6.497 |
0.000 |
PartyID: Republican |
-8.945 |
0.000 |
25.891 |
0.000 |
PartyID: Independent |
5.636 |
0.001 |
6.521 |
0.000 |
Gender: Male |
0.593 |
0.929 |
19.361 |
0.004 |
Gender: Female |
2.461 |
0.713 |
20.864 |
0.002 |
bind_rows(DemocraticThermometer.summary,
RepublicanThermometer.summary,
.id = "Model"
) %>%
mutate(Model = recode(Model, `1` = "Democratic", `2` = "Republican")) %>%
kable()
Democratic |
0.508 |
0.507 |
444.5345 |
21.08399 |
21.110 |
467.554 |
0 |
9 |
4086 |
Republican |
0.407 |
0.405 |
443.9693 |
21.07058 |
21.096 |
310.264 |
0 |
9 |
4086 |