require(ggthemes)
library(tidyverse)
library(magrittr)
library(tidyr)
library(dplyr)
library(lubridate)
library(ggplot2)
library(fpp2)   
library(forecast)
library(ggpubr)
library(boot)

A Simple Linear Regression Model

#Read a txt file, named "mtcars.txt"
my_data <- read.table(file.choose(), stringsAsFactor = FALSE,sep=",",na.strings = c("NA"," ",""),strip.white = T,
                       header = TRUE, dec =".")
head(my_data)
##         State  Lat Mort Ocean  Long
## 1     Alabama 33.0  219     1  87.0
## 2     Arizona 34.5  160     0 112.0
## 3    Arkansas 35.0  170     0  92.5
## 4  California 37.5  182     1 119.5
## 5    Colorado 39.0  149     0 105.5
## 6 Connecticut 41.8  159     1  72.8

Including Plots

cor(my_data$Lat, my_data$Mort) #correlation between stopping distance vs. speed
## [1] -0.8245178
# Compute the analysis of variance
res.aov <- aov(Mort~ Lat, data = my_data)
# Summary of the analysis
summary(res.aov)
##             Df Sum Sq Mean Sq F value   Pr(>F)    
## Lat          1  36464   36464    99.8 3.31e-13 ***
## Residuals   47  17173     365                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# 1. Homogeneity of variances
plot(res.aov, 1)

# 2. Normality
plot(res.aov, 2)


Summary and Conclusion:


- The skin cancer data is an ideal candidate for a simple linear regression model.  The predictor (latitude) and the response variables are highly correlated at -82%.  The statistical diagnostics revealed that all the conditions of linearity, Homogeneity of variances (residuals) and signifcance of the slope meets the conditions for a simple linear regression. Further analysis, also showed that the residuals are truly normal and for the purist, do not need further data transforms.  But in this case, we can say that the linear regression formula is as follows:

- Therefore the linear equation: y = -6*x + 389.2 can be use for prediciton