storms <- read.csv("~/RPI/Classes/Design of Experiments/R/stormtracksTD.csv", header=TRUE)
#reads in the data from the csv file 'stormtracks.csv' and assigns it to the dataframe 'storm'
#To facilate the use of Taguchi design, the levels of each factor will be simplfiled into 3 or 4 factors.
storms$Year[as.numeric(storms$Year) >= 1995 & as.numeric(storms$Year) <= 1996] = "1"
storms$Year[as.numeric(storms$Year) >= 1996 & as.numeric(storms$Year) <= 1998] = "2"
storms$Year[as.numeric(storms$Year) >= 1999 & as.numeric(storms$Year) <= 2000] = "3"
storms$Month[as.numeric(storms$Month) >= 6 & as.numeric(storms$Month) <= 8] = "1"
storms$Month[as.numeric(storms$Month) >= 9 & as.numeric(storms$Month) <= 10] = "2"
storms$Month[as.numeric(storms$Month) >= 11 & as.numeric(storms$Month) <= 12] = "3"
storms$Day[as.numeric(storms$Day) >= 1 & as.numeric(storms$Day) <= 10] = "1"
storms$Day[as.numeric(storms$Day) >= 11 & as.numeric(storms$Day) <= 20] = "2"
storms$Day[as.numeric(storms$Day) >= 21 & as.numeric(storms$Day) <= 31] = "3"
storms$Hour[as.numeric(storms$Hour) >= 0 & as.numeric(storms$Hour) <= 6] = "1"
storms$Hour[as.numeric(storms$Hour) >= 12 & as.numeric(storms$Hour) <=12] = "2"
storms$Hour[as.numeric(storms$Hour) >= 18 & as.numeric(storms$Hour) <=18] = "3"
storms$Day = as.factor(storms$Day)
storms$Year = as.factor(storms$Year)
storms$Month = as.factor(storms$Month)
storms$Hour = as.factor(storms$Hour)
storms$Type = as.factor(storms$Type)
#Organized the factors
head(storms)
## Year Month Day Hour Type Wind
## 1 1 1 1 1 1 30
## 2 1 1 1 1 1 30
## 3 1 1 1 2 2 35
## 4 1 1 1 3 2 40
## 5 1 1 1 1 2 50
## 6 1 1 1 1 2 60
#displays the first 6 sets of variables
tail(storms)
## Year Month Day Hour Type Wind
## 2740 3 2 3 1 2 50
## 2741 3 2 3 2 2 50
## 2742 3 2 3 3 2 45
## 2743 3 2 3 1 4 40
## 2744 3 2 3 1 4 40
## 2745 3 2 3 2 4 35
#displays the last 6 sets of variables
summary(storms)
## Year Month Day Hour Type Wind
## 1:1260 1:1054 1: 775 1:1357 1:511 Min. : 15.0
## 2: 667 2:1514 2: 835 2: 691 2:926 1st Qu.: 35.0
## 3: 818 3: 177 3:1135 3: 697 3:896 Median : 50.0
## 4:412 Mean : 54.7
## 3rd Qu.: 70.0
## Max. :155.0
#displays a summary of the variables
#Boxplots of the factors
boxplot(Wind~Year,data=storms, xlab="Year", ylab="Wind Speed (mph)")
title("Storm Wind Speed Based on Year")
boxplot(Wind~Month,data=storms, xlab="Month", ylab="Wind Speed (mph)")
title("Storm Wind Speed Based on Month")
boxplot(Wind~Day,data=storms, xlab="Day", ylab="Wind Speed (mph)")
title("Storm Wind Speed Based on Day")
boxplot(Wind~Hour,data=storms, xlab="Hour", ylab="Wind Speed (mph)")
title("Storm Wind Speed Based on Hour")
boxplot(Wind~Type,data=storms, xlab="Type", ylab="Wind Speed (mph)")
title("Storm Wind Speed Based on Type")
model = aov(Wind~Year+Month+Day+Hour+Type,data=storms)
anova(model)
## Analysis of Variance Table
##
## Response: Wind
## Df Sum Sq Mean Sq F value Pr(>F)
## Year 2 3874 1937 10.5567 2.71e-05 ***
## Month 2 23650 11825 64.4475 < 2.2e-16 ***
## Day 2 15817 7909 43.1015 < 2.2e-16 ***
## Hour 2 157 79 0.4291 0.6511
## Type 3 1288267 429422 2340.3581 < 2.2e-16 ***
## Residuals 2733 501467 183
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#preforms an anova that analyses the variation of wind speed based on the variation in the five factor in this recipe.
library(DoE.base)
## Loading required package: grid
## Loading required package: conf.design
##
## Attaching package: 'DoE.base'
##
## The following objects are masked from 'package:stats':
##
## aov, lm
##
## The following object is masked from 'package:graphics':
##
## plot.design
TD = oa.design(factor.names=c("Year","Month","Day","Hour","Type"), nlevels=c(3,3,3,3,4))
## The columns of the array have been used in order of appearance.
## For designs with relatively few columns,
## the properties can sometimes be substantially improved
## using option columns with min3 or even min34.
#create the design
#param.design kept on producing an error
#used oa.design instead
TDstorms = merge(TD,storms,by=c("Year","Month","Day","Hour","Type"),all = FALSE)
#merge the TD with the orginal data set
uniqueTDstorms = unique(TDstorms[ , 1:5])
#identify the unique values
uniqueTDstorms
## Year Month Day Hour Type
## 1 1 1 1 1 1
## 19 1 1 1 1 2
## 38 1 1 2 2 3
## 49 1 1 2 2 4
## 52 1 2 1 3 1
## 67 1 2 1 3 2
## 84 1 2 3 1 3
## 113 1 2 3 1 4
## 136 1 3 2 3 1
## 139 1 3 2 3 2
## 141 2 1 1 3 4
## 142 2 1 3 1 1
## 156 2 1 3 1 2
## 175 2 2 2 2 1
## 182 2 2 2 2 2
## 191 2 2 3 3 3
## 218 2 2 3 3 4
## 221 2 3 1 2 4
## 228 3 1 2 3 3
## 234 3 1 3 2 1
## 237 3 1 3 2 2
## 250 3 2 1 2 1
## 256 3 2 1 2 2
## 270 3 2 2 1 3
## 317 3 2 2 1 4
## 332 3 3 3 3 1
#display the unique vales for reference
wind = TDstorms$Wind[index=c(1,19,38,49,52,67,84,113,136,139,141,142,156,175,182,191,218,221,228,234,237,250,256,270,317,332)]
#select the wind data that corresponds the the unique rows
newTDstorms = cbind(uniqueTDstorms,wind)
#combines the unique data with the corresponding wind data
newTDstorms
## Year Month Day Hour Type wind
## 1 1 1 1 1 1 20
## 19 1 1 1 1 2 55
## 38 1 1 2 2 3 65
## 49 1 1 2 2 4 50
## 52 1 2 1 3 1 30
## 67 1 2 1 3 2 45
## 84 1 2 3 1 3 70
## 113 1 2 3 1 4 55
## 136 1 3 2 3 1 30
## 139 1 3 2 3 2 55
## 141 2 1 1 3 4 25
## 142 2 1 3 1 1 25
## 156 2 1 3 1 2 45
## 175 2 2 2 2 1 30
## 182 2 2 2 2 2 45
## 191 2 2 3 3 3 65
## 218 2 2 3 3 4 35
## 221 2 3 1 2 4 50
## 228 3 1 2 3 3 65
## 234 3 1 3 2 1 25
## 237 3 1 3 2 2 55
## 250 3 2 1 2 1 20
## 256 3 2 1 2 2 40
## 270 3 2 2 1 3 95
## 317 3 2 2 1 4 80
## 332 3 3 3 3 1 25
#displays the TD design
snratio =-10*log10(1/newTDstorms$wind^2)
#calculates all of the S/N ratios
maxsnratio = which(snratio==max(snratio))
maxsnratio
## [1] 24
#determines and displays the maximum S/N ration
newTDstorms[maxsnratio, ]
## Year Month Day Hour Type wind
## 270 3 2 2 1 3 95
#displays the row that produces the maximum S/N ratio
TDmodel = aov(wind~Year+Month+Day+Hour+Type,data=newTDstorms)
anova(TDmodel)
## Analysis of Variance Table
##
## Response: wind
## Df Sum Sq Mean Sq F value Pr(>F)
## Year 2 481.0 240.50 4.0525 0.0408762 *
## Month 2 548.0 274.01 4.6171 0.0288409 *
## Day 2 2085.9 1042.95 17.5736 0.0001522 ***
## Hour 2 970.0 485.00 8.1721 0.0044500 **
## Type 3 4699.6 1566.53 26.3958 5.058e-06 ***
## Residuals 14 830.9 59.35
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
shapiro.test(newTDstorms$wind)
##
## Shapiro-Wilk normality test
##
## data: newTDstorms$wind
## W = 0.9413, p-value = 0.1445
#perform a shapiro-wilk test
#null hypothesis not rejected (normal)
#Q-Q Plots
#Linear Model
qqnorm(residuals(TDmodel), main="Normal Q-Q Plot", ylab="Wind Speed (mph) Residuals")
qqline(residuals(TDmodel))
#produces a Q-Q normal plot with a normal fit line
#R vs F plot
plot(fitted(TDmodel),residuals(TDmodel), main="Residual vs Fitted Plot")
#Produces a residual plot