library(openintro)
library(datarium)
# The openintro and datarium packages have datasets that we will use
library(dplyr)
library(ggplot2)
# These are packages for manipulating and plotting data
library(caret)
# for lookking at the performance of models
1. Mammals
data("mammals", package = "openintro")
mammals <- mammals %>% mutate(logBrainWt = log(BrainWt, base=10), logBodyWt = log(BodyWt, base=10)) %>% filter(!is.na(TotalSleep))
m <- lm(TotalSleep ~ logBrainWt + logBodyWt + Predation + Exposure + Danger,
data = mammals)
summary(m)
5-fold Cross-Validation
fitControl <- trainControl(method = "repeatedcv",
number = 5)
model.complex <- train(TotalSleep ~ logBrainWt + logBodyWt + Predation + Exposure + Danger,
data = mammals,
method = "lm",
trControl = fitControl)
model.complex
5-fold Cross-Validation, repeated 10 times
fitControl <- trainControl(method = "repeatedcv",
number = 5, repeats = 10)
model.complex <- train(TotalSleep ~ logBrainWt + logBodyWt + Predation + Exposure + Danger,
data = mammals,
method = "lm",
trControl = fitControl)
model.complex
Leave-one-out Cross-Validation
fitControl <- trainControl(method = "LOOCV")
model.complex <- train(TotalSleep ~ logBrainWt + logBodyWt + Predation + Exposure + Danger,
data = mammals,
method = "lm",
trControl = fitControl)
model.complex
model.simple <- train(TotalSleep ~ logBrainWt + Predation + Danger,
data = mammals,
method = "lm",
trControl = fitControl)
model.simple
2. More Ray Fair
presdata <- read.csv('https://raw.githubusercontent.com/jfcross4/data/master/bread_and_peace.csv', header=TRUE)
presdata <- presdata %>% mutate(IV = ifelse(I==1, VP, 100-VP))
presdata %>% ggplot(aes(G, IV, label=t))+geom_text()+xlab("Growth rate in GDP per capita (G)")+ylab("Incumbent Vote Share")+geom_smooth(method="lm")
m <- lm(IV ~ G, data=presdata)
summary(m)
presdata$predIV <- predict(m, presdata)
presdata %>% summarize(RMSE(IV, predIV), MAE(IV, predIV))
fitControl <- trainControl(method = "LOOCV")
model.G <- train(IV ~ G,
data = presdata,
method = "lm",
trControl = fitControl)
model.G
model.nothing <- train(IV ~ nothing,
data = presdata %>% mutate(nothing=1),
method = "lm",
trControl = fitControl)
model.nothing