library(forecast)
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
## Loading required package: timeDate
## This is forecast 7.3
library(ggplot2)
library(caret)
## Warning: package 'caret' was built under R version 3.3.3
## Loading required package: lattice
library(readxl)
Epidemic <- read_excel("~/rProjects/Assignment_7/PowderyMildewEpidemic.xlsx")
# Plot the binary values of epidemic outbreaks using different colors with Max Temp on y-axis and Relative Humidity on x-axis
# Make the points solid squares (pch=15)
plot(Epidemic$MaxTemp ~ Epidemic$RelHumidity, xlab="Relative Humidity", ylab="Maximum Temperature", bty="l", col = Epidemic$Binary+1, pch=15)
# Add a legend
legend(60, 28, c("Outbreak", "No Outbreak"), col=2:1, pch=15)
# Plot the binary values of epidemic outbreaks using different colors with Max Temp on x-axis and Relative Humidity on y-axis
# Make the points solid squares (pch=15)
plot(Epidemic$RelHumidity ~ Epidemic$MaxTemp, xlab="Maximum Temperature", ylab="Relative Humitidy", bty="l", col = Epidemic$Binary+1, pch=15)
# Add a legend
legend(27, 70, c("Outbreak", "No Outbreak"), col=2:1, pch=15)
# Create a classification matrix to test the roll forward forecast for accuracy
confusionMatrix(naiveForecasts, Epidemic$Binary[(length(Epidemic$Binary)-3):length(Epidemic$Binary)], positive=c("1"))
## Confusion Matrix and Statistics
##
## Reference
## Prediction 0 1
## 0 1 1
## 1 2 0
##
## Accuracy : 0.25
## 95% CI : (0.0063, 0.8059)
## No Information Rate : 0.75
## P-Value [Acc > NIR] : 0.9961
##
## Kappa : -0.5
## Mcnemar's Test P-Value : 1.0000
##
## Sensitivity : 0.0000
## Specificity : 0.3333
## Pos Pred Value : 0.0000
## Neg Pred Value : 0.5000
## Prevalence : 0.2500
## Detection Rate : 0.0000
## Detection Prevalence : 0.5000
## Balanced Accuracy : 0.1667
##
## 'Positive' Class : 1
##
# First eight data points to be used for training period
training <- Epidemic[1:8, ]
# Look at it to make sure it did what we wanted
training
## # A tibble: 8 × 5
## Year Outbreak Binary MaxTemp RelHumidity
## <dbl> <chr> <dbl> <dbl> <dbl>
## 1 1987 Yes 1 30.14 82.86
## 2 1988 No 0 30.66 79.57
## 3 1989 No 0 26.31 89.14
## 4 1990 Yes 1 28.43 91.00
## 5 1991 No 0 29.57 80.57
## 6 1992 Yes 1 31.25 67.82
## 7 1993 No 0 30.35 61.76
## 8 1994 Yes 1 30.71 81.14
# Fit a logistic regression model to the training period
outLogReg <- glm(Binary ~ MaxTemp + RelHumidity, data=training, family="binomial")
# Look at the summary
summary(outLogReg)
##
## Call:
## glm(formula = Binary ~ MaxTemp + RelHumidity, family = "binomial",
## data = training)
##
## Deviance Residuals:
## 1 2 3 4 5 6 7 8
## 0.7466 -1.7276 -0.3132 1.0552 -1.1419 1.2419 -0.3908 0.6060
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -56.1543 44.4573 -1.263 0.207
## MaxTemp 1.3849 1.1406 1.214 0.225
## RelHumidity 0.1877 0.1578 1.189 0.234
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 11.0904 on 7 degrees of freedom
## Residual deviance: 8.1198 on 5 degrees of freedom
## AIC: 14.12
##
## Number of Fisher Scoring iterations: 5
# Generate predictions using the fitted logistic regression model created above
predictions <- predict(outLogReg, Epidemic[9:12,], type="response")
# View Predictions for entire validation period
# This is the probability of the year having an epidemic of powdery mildew on mangos
predictions
## 1 2 3 4
## 0.1119407 0.7021411 0.5705413 0.3894790
# Generate predictions for only 1995
prediction95 <- predict(outLogReg, Epidemic[9,], type="response")
# View Predictions for 1995 only
# This is the probability of the year having an epidemic of powdery mildew on mangos
prediction95
## 1
## 0.1119407
# Generate the confusion matrix using .5 as the cutoff
confusionMatrix(ifelse(predictions > 0.5, 1, 0), Epidemic[9:12,]$Binary, positive=c("1"))
## Confusion Matrix and Statistics
##
## Reference
## Prediction 0 1
## 0 2 0
## 1 1 1
##
## Accuracy : 0.75
## 95% CI : (0.1941, 0.9937)
## No Information Rate : 0.75
## P-Value [Acc > NIR] : 0.7383
##
## Kappa : 0.5
## Mcnemar's Test P-Value : 1.0000
##
## Sensitivity : 1.0000
## Specificity : 0.6667
## Pos Pred Value : 0.5000
## Neg Pred Value : 1.0000
## Prevalence : 0.2500
## Detection Rate : 0.2500
## Detection Prevalence : 0.5000
## Balanced Accuracy : 0.8333
##
## 'Positive' Class : 1
##