GSS2006 Multinomial Model

library(tidyverse)
library(haven)
library(nnet)
library(wesanderson)
library(scales)
library(broom)
library(knitr)
data <- read_sav("C:/Users/LuisAlberto/Downloads/GSS2006.sav")
data <- data[,c("NATMASS", "AGE", "SEX", "SEI", "REGION", "POLVIEWS")]
data <- na.omit(data)
# 1 y 2
data$POLVIEWS <- as.factor(data$POLVIEWS)
data$POLVIEWS <- factor(data$POLVIEWS, 
                        labels = c("Extremely liberal", "Liberal", "Slightly liberal", "Moderate",
                                   "Slightly conservative", "Conservative", "Extremely conservative"))
data$NATMASS <- as.factor(data$NATMASS)
data$NATMASS <- factor(data$NATMASS,
                       labels = c("Too little", "About right", "Too much"))
data$NATMASS <- relevel(data$NATMASS, ref = "About right")

data <- data %>% mutate(obs_num = 1:n())

# 3
ggplot(data, aes(x = POLVIEWS, fill = NATMASS)) + geom_bar(position = "fill") + 
  theme_minimal() + labs(title = "Appreciation of Mass Transportation expense per Political View", 
                         fill = "Appreciation", x = "Political View", y = "Percentage")

# 4 volveremos la media de ambos cero para que el intercepto nos sirva para entender las caracteristicas
# teniendo la edad y el SEI promedio
data$AGE <- as.integer(data$AGE)
data$SEI <- as.double(data$SEI)

meanAGE <- mean(data$AGE)
meanSEI <- mean(data$SEI)

data$AGE <- data$AGE - meanAGE
data$SEI <- data$SEI - meanSEI

# 5
fit1 <- multinom(NATMASS ~ AGE + SEX + SEI + REGION, data = data)
summary(fit1)
tidy(fit1, exponentiate = F)

# 6 no sé

# 7 incompleto
fit2 <- multinom(NATMASS ~ AGE + SEX + SEI + REGION + POLVIEWS, data = data)
summary(fit2)
tidy(fit2, exponentiate = F)
# Mejora el AIC entonces la variable de POLVIEWS ayuda al modelo a ser mejor

# 8 incompleto
pred.probs <- as_tibble(predict(fit2, type = "probs")) %>%
  mutate(obs_num = 1:n())
residuals <- as_tibble(residuals(fit2)) %>%
  setNames(paste('resid.', names(.), sep = "")) %>%
  mutate(obs_num = 1:n())

aug <- inner_join(data, pred.probs)
aug <- inner_join(aug, residuals)

plot(aug$`About right`, aug$`resid.About right`)

plot(aug$`Too little`, aug$`resid.Too little`)

plot(aug$`Too much`, aug$`resid.Too much`)

#9 y 10 faltan