library(tidyverse)
library(haven)
library(nnet)
library(wesanderson)
library(scales)
library(broom)
library(knitr)
data <- read_sav("C:/Users/LuisAlberto/Downloads/GSS2006.sav")
data <- data[,c("NATMASS", "AGE", "SEX", "SEI", "REGION", "POLVIEWS")]
data <- na.omit(data)
# 1 y 2
data$POLVIEWS <- as.factor(data$POLVIEWS)
data$POLVIEWS <- factor(data$POLVIEWS,
labels = c("Extremely liberal", "Liberal", "Slightly liberal", "Moderate",
"Slightly conservative", "Conservative", "Extremely conservative"))
data$NATMASS <- as.factor(data$NATMASS)
data$NATMASS <- factor(data$NATMASS,
labels = c("Too little", "About right", "Too much"))
data$NATMASS <- relevel(data$NATMASS, ref = "About right")
data <- data %>% mutate(obs_num = 1:n())
# 3
ggplot(data, aes(x = POLVIEWS, fill = NATMASS)) + geom_bar(position = "fill") +
theme_minimal() + labs(title = "Appreciation of Mass Transportation expense per Political View",
fill = "Appreciation", x = "Political View", y = "Percentage")
# 4 volveremos la media de ambos cero para que el intercepto nos sirva para entender las caracteristicas
# teniendo la edad y el SEI promedio
data$AGE <- as.integer(data$AGE)
data$SEI <- as.double(data$SEI)
meanAGE <- mean(data$AGE)
meanSEI <- mean(data$SEI)
data$AGE <- data$AGE - meanAGE
data$SEI <- data$SEI - meanSEI
# 5
fit1 <- multinom(NATMASS ~ AGE + SEX + SEI + REGION, data = data)
summary(fit1)
tidy(fit1, exponentiate = F)
# 6 no sé
# 7 incompleto
fit2 <- multinom(NATMASS ~ AGE + SEX + SEI + REGION + POLVIEWS, data = data)
summary(fit2)
tidy(fit2, exponentiate = F)
# Mejora el AIC entonces la variable de POLVIEWS ayuda al modelo a ser mejor
# 8 incompleto
pred.probs <- as_tibble(predict(fit2, type = "probs")) %>%
mutate(obs_num = 1:n())
residuals <- as_tibble(residuals(fit2)) %>%
setNames(paste('resid.', names(.), sep = "")) %>%
mutate(obs_num = 1:n())
aug <- inner_join(data, pred.probs)
aug <- inner_join(aug, residuals)
plot(aug$`About right`, aug$`resid.About right`)