Challenge 1: Limited Resources
addy<- read.csv ("https://raw.githubusercontent.com/prlitics/Election-Data-Science-Fall-2020/master/Data/wk12addr.csv")
clusters<- kmeans(addy[,2:3], 4, iter.max = 25)
vec<- clusters[["cluster"]]
addy_1<- cbind(addy, vec)
ggplot(addy_1) + geom_point(aes(x = longitude, y = latitude, color = as.factor(vec))) + theme_void()

Challenge 2: A Better Turnout Model
data("timeseries_2016")
anes16 <- timeseries_2016
rm(timeseries_2016)
clean <- function(x){ifelse(x < 0, NA, x)}
anes16_clean <- anes16 %>%
mutate(across(everything(), clean)) %>%
# What features do we want?
## Age, Income, PID, Ideology, Gender, Education, Race, Religion
select(PID = V161158x , Gender = V161342, Income = V161361x, Religion = V161265x,Contact = V162007,
Education = V161270, Age = V161267, Ideology = V161126, Race = V161310x, Vote = V162031x) %>%
mutate(across(c(PID, Gender, Income, Religion, Education, Ideology, Race, Contact, Vote),as.factor)) %>%
drop_na()
anes16_clean$Vote <- plyr::revalue(anes16_clean$Vote, c("1" = "Vote", "0"="Not_Vote"))
# Split the data
set.seed(131313)
split1 <- initial_split(anes16_clean, prop = .7)
Train_Data <- training(split1)
Test_Data<-testing(split1)
# Feature Engineering
anes_recipe <- recipe(Vote ~ ., data = Train_Data) %>%
step_dummy(PID, Gender, Income, Religion, Education, Ideology, Race, Contact) %>%
step_center(Age) %>%
step_scale(Age) %>%
prep(training = Train_Data)
# Split the data
baked_train <- bake(anes_recipe, new_data = Train_Data)
baked_test <- bake(anes_recipe, new_data = Test_Data)
set.seed(131313)
cv <- trainControl(
method = "repeatedcv",
number = 10,
repeats = 5,
classProbs = T,
summaryFunction = twoClassSummary
)
# Create a hyperparameter grid search
hyper_grid <- expand.grid(
k = floor(seq(15,31, by = 2))
)
# Fit knn model and perform grid search
knn_grid <- train(
Vote ~ .,
data = baked_train,
method = "knn",
trControl = cv,
tuneGrid = hyper_grid,
metric = "ROC"
)
ggplot(knn_grid)

knn_grid$results
## k ROC Sens Spec ROCSD SensSD SpecSD
## 1 15 0.6804023 0.021241379 0.9959035 0.05437139 0.022842889 0.005170448
## 2 17 0.6876078 0.013678161 0.9968736 0.05044798 0.019502592 0.004368591
## 3 19 0.6922816 0.010965517 0.9980599 0.04736753 0.017587748 0.003033282
## 4 21 0.6932337 0.010298851 0.9984911 0.04482077 0.019954570 0.002674627
## 5 23 0.6905439 0.005448276 0.9992461 0.04273380 0.014297619 0.001887378
## 6 25 0.6906818 0.003402299 0.9991380 0.04450305 0.010312091 0.002274386
## 7 27 0.6919629 0.003425287 0.9994624 0.04437681 0.012502118 0.001958149
## 8 29 0.6911869 0.002068966 0.9993543 0.04499333 0.008272343 0.001766437
## 9 31 0.6938791 0.002712644 0.9993537 0.04746736 0.009293866 0.002078104