library(readxl)
library(ggcorrplot)
library(ggplot2)
library(gridExtra)
# load "Historical weather around Szeged, Hungary (from 2006 to 2016)"
# url = "https://www.kaggle.com/budincsevity/szeged-weather"
setwd("~/R/WeatherHistLinReg/")
Data1 <- read.csv("weatherHistory.csv")
Weather in Szeged 2006-2016: Is there a relationship between humidity and temperature? What about between humidity and apparent temperature? Can you predict the apparent temperature given the humidity?
# Perform EDA & Correlation Analysis
Data2 <- Data1[, c(4,5,6)] # keeping predictor and outcome
names(Data2) <- c("Temp","AppTemp","Humidity")
Data3 <- round(cor(Data2),1)
ggcorrplot(Data3, hc.order = TRUE, type = "lower", lab = TRUE)
Build a predictive model using: “Humidity” as predictor and “Temperature” and “Apparaent Temperature” as outcome
TempFit <- lm(Temp ~ Humidity, data = Data2)
AppTempFit <- lm(AppTemp ~ Humidity, data = Data2)
Get a 95% confidence interval for the expected temperatures at the average humidity level.
TempFit1 <- lm(Temp ~ I(Humidity - mean(Humidity)), data = Data2)
AppTempFit1 <- lm(AppTemp ~ I(Humidity - mean(Humidity)), data = Data2)
list("Temperature" = confint(TempFit1),
"Apparent Temperature" = confint(AppTempFit1))
## $Temperature
## 2.5 % 97.5 %
## (Intercept) 11.88598 11.97938
## I(Humidity - mean(Humidity)) -31.13331 -30.65546
##
## $`Apparent Temperature`
## 2.5 % 97.5 %
## (Intercept) 10.80115 10.90890
## I(Humidity - mean(Humidity)) -33.25009 -32.69885
Visualize linear regression model
par(mfrow = c(1,2))
plot(Data2$Humidity,Data2$Temp,
xlab = "Humidity", ylab = "Temperature", col = "grey")
abline(TempFit,col = "red", lwd = 2)
title(list("Temperature Prediction Given Humidity"
,cex=1.5, col="grey40",font = 6),line=-1.5,outer=T)
plot(Data2$Humidity,Data2$AppTemp,
xlab = "Humidity", ylab = "Apparent Temperature", col = "grey")
abline(AppTempFit,col = "red", lwd = 2)
Give temperature predictions for 0.5 humdity
Temperature <- predict(TempFit, newdata = data.frame(Humidity = 0.5),
interval = "prediction")
ApparentTemp <- predict(AppTempFit, newdata = data.frame(Humidity = 0.5),
interval = "prediction")
Prediction <- rbind(Temperature,ApparentTemp)
row.names(Prediction) <- c("Temperature","ApparentTemp")
Prediction1<-as.data.frame(Prediction)
Prediction1$fitF <- Prediction1$fit * 1.8 + 32
Prediction1$lwrF <- Prediction1$lwr * 1.8 + 32
Prediction1$uprF <- Prediction1$upr * 1.8 + 32
Prediction1
## fit lwr upr fitF lwrF uprF
## Temperature 19.18974 4.685245 33.69423 66.54153 40.43344 92.64961
## ApparentTemp 18.60070 1.868403 35.33299 65.48126 35.36313 95.59939
Now to check our code against the manual way using the linear formula y = mx + b or y = Coefficient * predictor + Intercept.
coeffsT <- coefficients(TempFit)
coeffsAT <- coefficients(AppTempFit)
coeffsT[2] * 0.5 + coeffsT[1]
## Humidity
## 19.18974
coeffsAT[2] * 0.5 + coeffsAT[1]
## Humidity
## 18.6007