## Plot histogram of Energy consumption
hist(launch$EnergyConsumption,main="Energy consumption Histogram",xlab="Energy Consumption",col="light blue")
hist(launch$Temperature,main="Temperature",xlab="Temperature in degree Celcius",col="red")
library(ggplot2)
ggplot(launch, aes(x =factor(Occupancy),y=EnergyConsumption)) +
geom_col(fill = "orange") +
coord_cartesian(ylim=c(0,10000)) +
labs(
title = "Energy consumption by Occupancy",
x = "Occupancy",
y = "Energy Consumption"
) +
theme(plot.title = element_text(hjust = 0.5))
library(ggplot2)
ggplot(launch, aes(x =Holiday,y=EnergyConsumption)) +
geom_col(fill = "orange") +
labs(
title = "Energy consumption by Holiday",
x = "Holiday",
y = "Energy Consumption"
) +
theme(plot.title = element_text(hjust = 0.5))
library(ggplot2)
ggplot(launch, aes(x =Temperature,y=EnergyConsumption)) +
geom_point(color= "blue") +
stat_smooth(aes(x=Temperature,y=EnergyConsumption),method="lm",color="red")+
labs(
title = "Relation between Temperature vs Energy Consumption",
x = "Temperature",
y = "EnergyConsumption"
) +
theme(plot.title = element_text(hjust = 0.5))
## `geom_smooth()` using formula = 'y ~ x'
library(ggplot2)
ggplot(launch, aes(x =DayOfWeek,y=EnergyConsumption)) +
geom_col(fill = "orange") +
coord_cartesian(ylim=c(0,12500)) +
labs(
title = "Energy consumption by days of week",
x = "Days of Week",
y = "Energy Consumption"
) +
theme(plot.title = element_text(hjust = 0.5))
##Correlation Matrix between all variables
library(ggplot2)
library(reshape2)
# Select numeric columns
num_cols <- launch[, sapply(launch, is.numeric)]
# Correlation matrix
corr <- cor(num_cols, method = "pearson")
# Convert matrix to long format
corr_long <- melt(corr)
# Plot
ggplot(corr_long, aes(Var1, Var2, fill = value)) +
geom_tile(color = "white") +
geom_text(aes(label = round(value, 2)), color = "black") +
scale_fill_gradient(low = "lightblue", high = "grey") +
labs(
title = "Correlation Matrix",
x = "",
y = ""
) +
theme_minimal() +
theme(
axis.text.x = element_text(angle = 45, hjust = 1),
plot.title = element_text(hjust=0.5, face="bold")
)
## Summary of model simple
model <- lm(EnergyConsumption ~ Temperature+Humidity+Occupancy+RenewableEnergy+HVACUsage_num+LightingUsage,data = launch)
summary(model)
##
## Call:
## lm(formula = EnergyConsumption ~ Temperature + Humidity + Occupancy +
## RenewableEnergy + HVACUsage_num + LightingUsage, data = launch)
##
## Residuals:
## Min 1Q Median 3Q Max
## -16.8658 -3.5656 0.3398 3.3712 15.7656
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 22.45238 1.74104 12.896 < 2e-16 ***
## Temperature 1.99349 0.05623 35.450 < 2e-16 ***
## Humidity -0.04019 0.01882 -2.136 0.0329 *
## Occupancy 0.52807 0.05565 9.489 < 2e-16 ***
## RenewableEnergy 0.07274 0.01824 3.988 7.16e-05 ***
## HVACUsage_num 4.61272 0.32066 14.385 < 2e-16 ***
## LightingUsageOn 1.70498 0.31943 5.338 1.17e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5.039 on 993 degrees of freedom
## Multiple R-squared: 0.6195, Adjusted R-squared: 0.6172
## F-statistic: 269.4 on 6 and 993 DF, p-value: < 2.2e-16
plot(model)
## Summary model with interaction
model_int <- lm(
EnergyConsumption ~ HVACUsage_num * Temperature +
HVACUsage_num * Occupancy +
Humidity + RenewableEnergy + DayOfWeek + Holiday,
data = launch
)
summary(model_int)
##
## Call:
## lm(formula = EnergyConsumption ~ HVACUsage_num * Temperature +
## HVACUsage_num * Occupancy + Humidity + RenewableEnergy +
## DayOfWeek + Holiday, data = launch)
##
## Residuals:
## Min 1Q Median 3Q Max
## -17.536 -3.726 0.202 3.443 15.210
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 23.04753 2.28865 10.070 < 2e-16 ***
## HVACUsage_num 4.78217 2.94808 1.622 0.105096
## Temperature 1.99121 0.07979 24.955 < 2e-16 ***
## Occupancy 0.58537 0.07889 7.420 2.53e-13 ***
## Humidity -0.04061 0.01919 -2.116 0.034567 *
## RenewableEnergy 0.07078 0.01866 3.794 0.000158 ***
## DayOfWeekMonday -0.27868 0.61246 -0.455 0.649190
## DayOfWeekSaturday -0.06760 0.58898 -0.115 0.908647
## DayOfWeekSunday -0.24304 0.57664 -0.421 0.673500
## DayOfWeekThursday 0.31685 0.58604 0.541 0.588862
## DayOfWeekTuesday -0.04553 0.58496 -0.078 0.937970
## DayOfWeekWednesday -0.01021 0.61569 -0.017 0.986768
## HolidayYes 0.39790 0.32738 1.215 0.224505
## HVACUsage_num:Temperature 0.01194 0.11536 0.103 0.917621
## HVACUsage_num:Occupancy -0.12664 0.11352 -1.116 0.264845
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5.121 on 985 degrees of freedom
## Multiple R-squared: 0.6101, Adjusted R-squared: 0.6046
## F-statistic: 110.1 on 14 and 985 DF, p-value: < 2.2e-16
plot(model_int)
## Comparison between the two different models
AIC(model, model_int)
#random Forest
library(randomForest)
## randomForest 4.7-1.2
## Type rfNews() to see new features/changes/bug fixes.
##
## Attaching package: 'randomForest'
## The following object is masked from 'package:ggplot2':
##
## margin
rf_model<-randomForest(EnergyConsumption~ Temperature+Humidity+Occupancy+RenewableEnergy+HVACUsage_num+LightingUsage, data=launch, ntree=300, mtry=3, importance=TRUE)
print(rf_model)
##
## Call:
## randomForest(formula = EnergyConsumption ~ Temperature + Humidity + Occupancy + RenewableEnergy + HVACUsage_num + LightingUsage, data = launch, ntree = 300, mtry = 3, importance = TRUE)
## Type of random forest: regression
## Number of trees: 300
## No. of variables tried at each split: 3
##
## Mean of squared residuals: 28.65399
## % Var explained: 56.76
varImpPlot(rf_model,main= "Feature Importance - Random Forest", col="blue",type=1)
model_pred <- predict(model)
rf_pred<-predict(rf_model)
#RMSE function
rmse<-function(actual,predicted){
sqrt(mean(actual-predicted)^2)
}
cat("Linear Regression RMSE:\n",rmse(launch$EnergyConsumption, model_pred), "\n")
## Linear Regression RMSE:
## 2.083397e-14
cat("Random Forest RMSE:\n", rmse(launch$EnergyConsumption, rf_pred))
## Random Forest RMSE:
## 0.001161292