library(openintro)
library(datarium)
# The openintro and datarium packages have datasets that we will use

library(dplyr)
library(ggplot2)
# These are packages for manipulating and plotting data

library(caret)
# for lookking at the performance of models

1. Mammals

data("mammals", package = "openintro")

mammals <- mammals %>% mutate(logBrainWt = log(BrainWt, base=10), logBodyWt = log(BodyWt, base=10)) %>% filter(!is.na(TotalSleep))

m <- lm(TotalSleep ~ logBrainWt + logBodyWt + Predation + Exposure + Danger,
                  data = mammals)
summary(m)

5-fold Cross-Validation

fitControl <- trainControl(method = "repeatedcv",   
                           number = 5)

model.complex <- train(TotalSleep ~ logBrainWt + logBodyWt + Predation + Exposure + Danger,
                  data = mammals,
                  method = "lm",  
                  trControl = fitControl) 

model.complex

5-fold Cross-Validation, repeated 10 times

fitControl <- trainControl(method = "repeatedcv",   
                           number = 5, repeats = 10)

model.complex <- train(TotalSleep ~ logBrainWt + logBodyWt + Predation + Exposure + Danger,
                  data = mammals,
                  method = "lm",  
                  trControl = fitControl) 

model.complex

Leave-one-out Cross-Validation

fitControl <- trainControl(method = "LOOCV")

model.complex <- train(TotalSleep ~ logBrainWt + logBodyWt + Predation + Exposure + Danger,
                  data = mammals,
                  method = "lm",  
                  trControl = fitControl) 

model.complex
model.simple <- train(TotalSleep ~ logBrainWt + Predation + Danger,
                  data = mammals,
                  method = "lm",  
                  trControl = fitControl) 

model.simple

2. More Ray Fair

presdata <- read.csv('https://raw.githubusercontent.com/jfcross4/data/master/bread_and_peace.csv', header=TRUE)

presdata <- presdata %>% mutate(IV =  ifelse(I==1, VP, 100-VP))

presdata %>% ggplot(aes(G, IV, label=t))+geom_text()+xlab("Growth rate in GDP per capita (G)")+ylab("Incumbent Vote Share")+geom_smooth(method="lm")

m <- lm(IV ~ G, data=presdata)
summary(m)

presdata$predIV <- predict(m, presdata)

presdata %>% summarize(RMSE(IV, predIV), MAE(IV, predIV))
fitControl <- trainControl(method = "LOOCV")

model.G <- train(IV ~ G,
                  data = presdata,
                  method = "lm",  
                  trControl = fitControl) 

model.G


model.nothing <- train(IV ~ nothing,
                  data = presdata %>% mutate(nothing=1),
                  method = "lm",  
                  trControl = fitControl) 

model.nothing