cane <- read.csv("~/RPI/Classes/Design of Experiments/R/cane.csv", header=TRUE)
#reads in the data from the csv file 'cane.csv' and assigns it to the dataframe 'cane'
#Summary of Data
head(cane)
## District DistrictGroup DistrictPosition SoilID SoilName Area Variety
## 1 PineCreek 2 4 838 Prior 0.81 152
## 2 NorthBarron 5 3 802 Liverpool 9.87 124
## 3 NorthBarron 5 3 802 Liverpool 1.53 158
## 4 NorthBarron 5 3 802 Liverpool 3.79 138
## 5 NorthBarron 5 3 802 Liverpool 2.81 124
## 6 NorthBarron 5 3 802 Liverpool 6.44 124
## Ratoon Age HarvestMonth HarvestDuration Amount Fibre Sugar
## 1 1R 1 6 3 47.09 17.20000 10.86667
## 2 1R 1 8 55 1243.41 15.70588 12.18882
## 3 2R 2 9 0 141.56 15.20000 13.75000
## 4 2R 2 9 0 310.69 15.40000 13.41333
## 5 2R 2 9 1 219.02 14.80000 12.70333
## 6 PL 0 7 5 902.46 15.22000 11.98300
#displays the first 6 sets of variables
tail(cane)
## District DistrictGroup DistrictPosition SoilID SoilName Area
## 3770 EastMulgrave 1 1 750 Innisfail 2.06
## 3771 EastMulgrave 1 1 748 Thorpe 0.56
## 3772 EastMulgrave 1 1 748 Thorpe 0.17
## 3773 EastMulgrave 1 1 748 Thorpe 1.37
## 3774 EastMulgrave 1 1 748 Thorpe 2.22
## 3775 Edmonton 2 2 827 Edmonton 0.24
## Variety Ratoon Age HarvestMonth HarvestDuration Amount Fibre Sugar
## 3770 152 2R 2 7 2 317.47 15.450 9.9075
## 3771 158 2R 2 7 0 60.37 16.000 10.5900
## 3772 138 5R 5 8 0 18.17 15.710 12.3700
## 3773 120 4R 4 7 0 128.73 15.000 11.3200
## 3774 138 4R 4 7 2 206.91 14.925 9.7975
## 3775 158 PL 0 7 0 7.69 15.860 11.5700
#displays the last 6 sets of variables
summary(cane)
## District DistrictGroup DistrictPosition SoilID
## WrightsCreek : 389 Min. :1.000 Min. :1.000 Min. :442.0
## Highleigh : 360 1st Qu.:2.000 1st Qu.:2.000 1st Qu.:712.0
## PineCreek : 317 Median :2.000 Median :4.000 Median :801.0
## LittleMulgrave: 308 Mean :2.697 Mean :3.703 Mean :757.5
## Aloomba : 284 3rd Qu.:3.000 3rd Qu.:5.000 3rd Qu.:816.0
## Hambledon : 267 Max. :5.000 Max. :5.000 Max. :838.0
## (Other) :1850
## SoilName Area Variety Ratoon
## Liverpool: 737 Min. : 0.020 138 :629 1R :767
## Mission : 399 1st Qu.: 0.880 120 :598 2R :760
## Innisfail: 330 Median : 1.940 152 :513 3R :692
## Virgil : 272 Mean : 2.578 124 :466 4R :493
## Thorpe : 237 3rd Qu.: 3.620 113 :358 PL :360
## Edmonton : 220 Max. :38.270 117 :319 RP :304
## (Other) :1580 (Other):892 (Other):399
## Age HarvestMonth HarvestDuration Amount
## Min. :0.000 Min. : 6.0 Min. : 0.000 Min. : 1.45
## 1st Qu.:1.000 1st Qu.: 7.0 1st Qu.: 0.000 1st Qu.: 75.54
## Median :2.000 Median : 9.0 Median : 1.000 Median : 173.46
## Mean :2.151 Mean : 8.6 Mean : 9.175 Mean : 240.11
## 3rd Qu.:3.000 3rd Qu.:10.0 3rd Qu.: 3.000 3rd Qu.: 336.40
## Max. :8.000 Max. :11.0 Max. :155.000 Max. :1954.01
##
## Fibre Sugar
## Min. :14.20 Min. : 6.08
## 1st Qu.:15.38 1st Qu.:10.93
## Median :15.80 Median :11.84
## Mean :15.87 Mean :11.82
## 3rd Qu.:16.25 3rd Qu.:12.73
## Max. :19.10 Max. :17.36
##
#displays a summary of the variables
#Make the target factors categorical
cane$group=as.factor(cane$DistrictGroup)
cane$position=as.factor(cane$DistrictPosition)
cane$age=as.factor(cane$Age)
cane$month=as.factor(cane$HarvestMonth)
#Boxplots of the factors
boxplot(Amount~group,data=cane, xlab="District Group", ylab="Amount of Sugar Cane (tonnes/hectare)")
title("Sugar Cane Yield Based on District Group")
boxplot(Amount~position,data=cane, xlab="District Position", ylab="Amount of Sugar Cane (tonnes/hectare)")
title("Sugar Cane Yield Based on District Position")
boxplot(Amount~age,data=cane, xlab="Age (years)", ylab="Amount of Sugar Cane (tonnes/hectare)")
title("Sugar Cane Yield Based on Age")
boxplot(Amount~month,data=cane, xlab="Harvest Month", ylab="Amount of Sugar Cane (tonnes/hectare)")
title("Sugar Cane Yield Based on Harvest Month")
library(rsm)
#load in the rsm package with the rsm function
lmodel=rsm(Amount~SO(DistrictGroup,DistrictPosition,Age,HarvestMonth), data=cane)
#create a linear model for the dataset
summary(lmodel)
##
## Call:
## rsm(formula = Amount ~ SO(DistrictGroup, DistrictPosition, Age,
## HarvestMonth), data = cane)
##
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -42.45668 153.30535 -0.2769 0.7818399
## DistrictGroup 31.44057 32.57864 0.9651 0.3345733
## DistrictPosition -53.50955 28.14896 -1.9009 0.0573860 .
## Age -19.26658 15.48683 -1.2441 0.2135541
## HarvestMonth 90.03525 31.76654 2.8343 0.0046174 **
## DistrictGroup:DistrictPosition -2.22788 3.94027 -0.5654 0.5718267
## DistrictGroup:Age 0.96478 2.05504 0.4695 0.6387616
## DistrictGroup:HarvestMonth 1.15884 2.10000 0.5518 0.5810983
## DistrictPosition:Age 1.49888 1.60263 0.9353 0.3497128
## DistrictPosition:HarvestMonth 0.69399 1.72068 0.4033 0.6867361
## Age:HarvestMonth 0.85632 1.45668 0.5879 0.5566651
## DistrictGroup^2 -3.64974 3.45072 -1.0577 0.2902721
## DistrictPosition^2 5.69977 4.42858 1.2870 0.1981590
## Age^2 0.39098 1.16802 0.3347 0.7378403
## HarvestMonth^2 -6.00654 1.76944 -3.3946 0.0006944 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Multiple R-squared: 0.01129, Adjusted R-squared: 0.00761
## F-statistic: 3.067 on 14 and 3760 DF, p-value: 9.337e-05
##
## Analysis of Variance Table
##
## Response: Amount
## Df Sum Sq
## FO(DistrictGroup, DistrictPosition, Age, HarvestMonth) 4 1275128
## TWI(DistrictGroup, DistrictPosition, Age, HarvestMonth) 6 154862
## PQ(DistrictGroup, DistrictPosition, Age, HarvestMonth) 4 709952
## Residuals 3760 187375478
## Lack of fit 311 16929613
## Pure error 3449 170445865
## Mean Sq F value
## FO(DistrictGroup, DistrictPosition, Age, HarvestMonth) 318782 6.3969
## TWI(DistrictGroup, DistrictPosition, Age, HarvestMonth) 25810 0.5179
## PQ(DistrictGroup, DistrictPosition, Age, HarvestMonth) 177488 3.5616
## Residuals 49834
## Lack of fit 54436 1.1015
## Pure error 49419
## Pr(>F)
## FO(DistrictGroup, DistrictPosition, Age, HarvestMonth) 3.977e-05
## TWI(DistrictGroup, DistrictPosition, Age, HarvestMonth) 0.795183
## PQ(DistrictGroup, DistrictPosition, Age, HarvestMonth) 0.006616
## Residuals
## Lack of fit 0.116342
## Pure error
##
## Stationary point of response surface:
## DistrictGroup DistrictPosition Age HarvestMonth
## 4.2519534 4.8950659 0.9679765 8.2567187
##
## Eigenanalysis:
## $values
## [1] 5.9261605 0.4157705 -3.7254080 -6.1820516
##
## $vectors
## [,1] [,2] [,3] [,4]
## DistrictGroup 0.10661955 -0.15687847 0.9532444 0.23525840
## DistrictPosition -0.98584052 0.11055463 0.1166557 0.04782886
## Age -0.12635427 -0.97866010 -0.1569140 0.04045989
## HarvestMonth -0.02802395 -0.07342498 0.2304225 -0.96991179
#display the model
# Display contour plots for all factor interactions
par(mfrow=c(1,1))
contour(lmodel, ~DistrictGroup+DistrictPosition+Age+HarvestMonth, image=TRUE, at=summary(lmodel$canonical$xs))
#Display Perspective Plots for all factor interactions
library(rgl)
#load rsl package with persp function
par(mfrow=c(1,1))
persp(lmodel,~DistrictGroup+DistrictPosition, image = TRUE, at = c(summary(lmodel)$canonical$xs, Block="B2"), contour="colors", zlab="Amount (tonnes/hectare)", theta=30)
## Warning in persp.default(dat$x, dat$y, dat$z, zlim = dat$zlim, theta =
## theta, : "image" is not a graphical parameter
## Warning in persp.default(dat$x, dat$y, dat$z, xlab = dat$labs[1], ylab =
## dat$labs[2], : "image" is not a graphical parameter
## Warning in title(sub = dat$labs[5], ...): "image" is not a graphical
## parameter
persp(lmodel,~DistrictGroup+Age, image = TRUE, at = c(summary(lmodel)$canonical$xs, Block="B2"), contour="colors", zlab="Amount (tonnes/hectare)" , theta=30)
## Warning in persp.default(dat$x, dat$y, dat$z, zlim = dat$zlim, theta =
## theta, : "image" is not a graphical parameter
## Warning in persp.default(dat$x, dat$y, dat$z, xlab = dat$labs[1], ylab =
## dat$labs[2], : "image" is not a graphical parameter
## Warning in title(sub = dat$labs[5], ...): "image" is not a graphical
## parameter
persp(lmodel,~DistrictGroup+HarvestMonth, image = TRUE, at = c(summary(lmodel)$canonical$xs, Block="B2"), contour="colors", zlab="Amount (tonnes/hectare)", theta=30)
## Warning in persp.default(dat$x, dat$y, dat$z, zlim = dat$zlim, theta =
## theta, : "image" is not a graphical parameter
## Warning in persp.default(dat$x, dat$y, dat$z, xlab = dat$labs[1], ylab =
## dat$labs[2], : "image" is not a graphical parameter
## Warning in title(sub = dat$labs[5], ...): "image" is not a graphical
## parameter
persp(lmodel,~DistrictPosition+Age, image = TRUE, at = c(summary(lmodel)$canonical$xs, Block="B2"), contour="colors", zlab="Amount (tonnes/hectare)", theta=30)
## Warning in persp.default(dat$x, dat$y, dat$z, zlim = dat$zlim, theta =
## theta, : "image" is not a graphical parameter
## Warning in persp.default(dat$x, dat$y, dat$z, xlab = dat$labs[1], ylab =
## dat$labs[2], : "image" is not a graphical parameter
## Warning in title(sub = dat$labs[5], ...): "image" is not a graphical
## parameter
persp(lmodel,~DistrictPosition+HarvestMonth, image = TRUE, at = c(summary(lmodel)$canonical$xs, Block="B2"), contour="colors", zlab="Amount (tonnes/hectare)", theta=30)
## Warning in persp.default(dat$x, dat$y, dat$z, zlim = dat$zlim, theta =
## theta, : "image" is not a graphical parameter
## Warning in persp.default(dat$x, dat$y, dat$z, xlab = dat$labs[1], ylab =
## dat$labs[2], : "image" is not a graphical parameter
## Warning in title(sub = dat$labs[5], ...): "image" is not a graphical
## parameter
persp(lmodel,~Age+HarvestMonth, image = TRUE, at = c(summary(lmodel)$canonical$xs, Block="B2"), contour="colors", zlab="Amount (tonnes/hectare)", theta=30)
## Warning in persp.default(dat$x, dat$y, dat$z, zlim = dat$zlim, theta =
## theta, : "image" is not a graphical parameter
## Warning in persp.default(dat$x, dat$y, dat$z, xlab = dat$labs[1], ylab =
## dat$labs[2], : "image" is not a graphical parameter
## Warning in title(sub = dat$labs[5], ...): "image" is not a graphical
## parameter
shapiro.test(cane$Amount)
##
## Shapiro-Wilk normality test
##
## data: cane$Amount
## W = 0.8227, p-value < 2.2e-16
#perform a shapiro-wilk test
#null hypothesis is rejected
#need to transform the data
shapiro.test(1/(cane$Amount))
##
## Shapiro-Wilk normality test
##
## data: 1/(cane$Amount)
## W = 0.405, p-value < 2.2e-16
#reciprical
#null hypothesis is rejected
shapiro.test((cane$Amount)^2)
##
## Shapiro-Wilk normality test
##
## data: (cane$Amount)^2
## W = 0.4568, p-value < 2.2e-16
#squared
#null hypothesis is rejected
shapiro.test(sqrt(cane$Amount))
##
## Shapiro-Wilk normality test
##
## data: sqrt(cane$Amount)
## W = 0.9609, p-value < 2.2e-16
#square root
#null hypothesis is rejected
shapiro.test(log(cane$Amount))
##
## Shapiro-Wilk normality test
##
## data: log(cane$Amount)
## W = 0.9857, p-value < 2.2e-16
#log
#null hypothesis is rejected
shapiro.test(log10(cane$Amount))
##
## Shapiro-Wilk normality test
##
## data: log10(cane$Amount)
## W = 0.9857, p-value < 2.2e-16
#log base 10
#null hypothesis is rejected
#Q-Q Plots
#Linear Model
qqnorm(residuals(lmodel), main="Normal Q-Q Plot", ylab="Amount (tonnes/hectare) Residuals")
qqline(residuals(lmodel))
#produces a Q-Q normal plot with a normal fit line
#R vs F plot
#Linear Model
plot(fitted(lmodel),residuals(lmodel), main="Residual vs Fitted Plot")
#Produces a residual plot