#ANOVA Workflow
library(ggplot2)
library(plyr)
#data transformation section
Cube.Tns <- function (x) {x^3}
Square.Tns <- function (x) {x^2}
Raw.Tns <- function (x) {x}
Sqrt.Tns <- function (x) {sqrt(x)}
Log.Tns <- function (x) {log10(x+0.00001)}
RecipRoot.Tns <- function (x) {-1/ sqrt(x)}
Recip.Tns <- function (x) {-1/(x)}
InvSquare.Tns <- function (x) {-1/(x^2)}data("faithful")
head(faithful)## eruptions waiting
## 1 3.600 79
## 2 1.800 54
## 3 3.333 74
## 4 2.283 62
## 5 4.533 85
## 6 2.883 55
summary(faithful) #mean, median, min## eruptions waiting
## Min. :1.600 Min. :43.0
## 1st Qu.:2.163 1st Qu.:58.0
## Median :4.000 Median :76.0
## Mean :3.488 Mean :70.9
## 3rd Qu.:4.454 3rd Qu.:82.0
## Max. :5.100 Max. :96.0
str(faithful)## 'data.frame': 272 obs. of 2 variables:
## $ eruptions: num 3.6 1.8 3.33 2.28 4.53 ...
## $ waiting : num 79 54 74 62 85 55 88 85 51 85 ...
hist(faithful$eruptions)Figure: Histogram of eruption time form the faithful dataset.
qqnorm(faithful$eruptions)
qqline(faithful$eruptions)Figure: QQ_Norm plot of eruptions from the faithful dataset.
plot(faithful$waiting ~ faithful$eruptions)Figure: Dotplot of length of eruptions vs. length of waiting time between eruptions from the faithful dataset.
faithful$eruptionstran <- -1/(faithful$eruptions)^3
qqnorm(faithful$eruptionstran)
qqline(faithful$eruptionstran)Figure: Inverse Cubed Transformation QQ-Norm Plot of Eruptions from Faithful Dataset.
LM1 <- lm(eruptions ~ waiting, data = faithful)
plot(LM1)Figure: Diagnostic plots for untransformed data for Eruptions vs. Time Waiting
Figure: Diagnostic plots for untransformed data for Eruptions vs. Time Waiting
Figure: Diagnostic plots for untransformed data for Eruptions vs. Time Waiting
Figure: Diagnostic plots for untransformed data for Eruptions vs. Time Waiting
summary(LM1)##
## Call:
## lm(formula = eruptions ~ waiting, data = faithful)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.29917 -0.37689 0.03508 0.34909 1.19329
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.874016 0.160143 -11.70 <2e-16 ***
## waiting 0.075628 0.002219 34.09 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.4965 on 270 degrees of freedom
## Multiple R-squared: 0.8115, Adjusted R-squared: 0.8108
## F-statistic: 1162 on 1 and 270 DF, p-value: < 2.2e-16
plott <- ggplot(faithful, aes(x=eruptions, y=waiting))
plott + geom_point(size=0.8, colour="palevioletred2") + ylab("time waiting for eruptions (min)") + xlab("length of eruption (min)") + ggtitle("Time Between Eruptions and the Duration of Old Faithful Eruptions") + theme(panel.background = element_rect(fill = 'slategray1', colour = 'black'))Figure: Length of eruption and time before an eruption from Old Faithful Geyser