Challenge 1: Limited Resources

addy<- read.csv ("https://raw.githubusercontent.com/prlitics/Election-Data-Science-Fall-2020/master/Data/wk12addr.csv")

clusters<- kmeans(addy[,2:3], 4, iter.max = 25)

vec<- clusters[["cluster"]]

addy_1<- cbind(addy, vec)


ggplot(addy_1) + geom_point(aes(x = longitude, y = latitude, color = as.factor(vec))) + theme_void()

Challenge 2: A Better Turnout Model

data("timeseries_2016")
anes16 <- timeseries_2016
rm(timeseries_2016)

clean <- function(x){ifelse(x < 0, NA, x)}

anes16_clean <- anes16 %>%
  mutate(across(everything(), clean)) %>%
  
  # What features do we want?
  ## Age, Income, PID, Ideology, Gender, Education, Race, Religion
  
  select(PID = V161158x , Gender = V161342, Income = V161361x, Religion = V161265x,Contact = V162007,
         Education = V161270, Age = V161267, Ideology = V161126, Race = V161310x, Vote = V162031x) %>%
  mutate(across(c(PID, Gender, Income, Religion, Education, Ideology, Race, Contact, Vote),as.factor)) %>%
  drop_na()

anes16_clean$Vote <- plyr::revalue(anes16_clean$Vote, c("1" = "Vote", "0"="Not_Vote"))

# Split the data
set.seed(131313)
split1 <- initial_split(anes16_clean, prop = .7)
Train_Data <- training(split1)
Test_Data<-testing(split1)

# Feature Engineering

anes_recipe <- recipe(Vote ~ ., data = Train_Data) %>% 
  step_dummy(PID, Gender, Income, Religion, Education, Ideology, Race, Contact) %>%
  step_center(Age) %>%
  step_scale(Age) %>%
  prep(training = Train_Data)

# Split the data

baked_train <- bake(anes_recipe, new_data = Train_Data)
baked_test <- bake(anes_recipe, new_data = Test_Data)



set.seed(131313)


cv <- trainControl(
  method = "repeatedcv", 
  number = 10, 
  repeats = 5,
  classProbs = T,
  summaryFunction = twoClassSummary
)

# Create a hyperparameter grid search
hyper_grid <- expand.grid(
  k = floor(seq(15,31, by = 2))
)

# Fit knn model and perform grid search
knn_grid <- train(
  Vote ~ .,
  data = baked_train, 
  method = "knn", 
  trControl = cv, 
  tuneGrid = hyper_grid,
  metric = "ROC"
)

ggplot(knn_grid)

knn_grid$results
##    k       ROC        Sens      Spec      ROCSD      SensSD      SpecSD
## 1 15 0.6804023 0.021241379 0.9959035 0.05437139 0.022842889 0.005170448
## 2 17 0.6876078 0.013678161 0.9968736 0.05044798 0.019502592 0.004368591
## 3 19 0.6922816 0.010965517 0.9980599 0.04736753 0.017587748 0.003033282
## 4 21 0.6932337 0.010298851 0.9984911 0.04482077 0.019954570 0.002674627
## 5 23 0.6905439 0.005448276 0.9992461 0.04273380 0.014297619 0.001887378
## 6 25 0.6906818 0.003402299 0.9991380 0.04450305 0.010312091 0.002274386
## 7 27 0.6919629 0.003425287 0.9994624 0.04437681 0.012502118 0.001958149
## 8 29 0.6911869 0.002068966 0.9993543 0.04499333 0.008272343 0.001766437
## 9 31 0.6938791 0.002712644 0.9993537 0.04746736 0.009293866 0.002078104

Challenge 3: Au Naturale (or is it?)

  1. not natural; selectivity issue 9COVID DISPROPORTIONATELY AFFECTS OLDER POPULATION WO THOSE PR4ECINCTS IN ARREAS THHAT HAVE MORE ELDERLY POP WERE MORE LIKELY TO SHUT DOWN QUICKLY
  2. natural; hypothetically, the thing that causes variation (the hurricane) is random beyond a person’s control
  3. Natural; exploits an arbitrary cut off