Explore some visualizations of the data and then build predictive models that will demonsrate to the client (home builder) how the data can be used to help a home owner make a decision about altering power consumptions. To provide imediate value, we need to give the client five business suggestions based on the insights we gleaned from the analysis.
We created some exploratory visualizations with no goal, with the intention to understand the data better. It is a good way to explore what can be done with the data and what limitations we may face.
#create day dataset
Winter_day <- EnergyConsumption %>%
filter(day(DateTime) == 11 & month(DateTime) == 2 & year(DateTime) == 2008)
#plot using plotly
plot_ly(Winter_day, x = ~DateTime, y = ~Sub_metering_1, name = 'Kitchen', type = 'scatter', mode = 'lines') %>%
add_trace(y = ~Sub_metering_2, name = 'Laundry Room', mode = 'lines') %>%
add_trace(y = ~Sub_metering_3, name = 'Water Heater & AC', mode = 'lines')%>%
add_trace(y = ~Sub_metering_0, name = 'Other Appliances', mode = 'lines') %>%
layout(title = "Power Consumption in a 2008 Winter Monday (11th Feb, 08)",
xaxis = list(title = "Day"),
yaxis = list (title = "Power (watt-hours)"))
#Visualizations --------------------------
#plot
ggplot(Winter_day) +
geom_line(aes(x=DateTime, y=Sub_metering_1, color='Submeter 1')) +
geom_line(aes(x=DateTime, y=Sub_metering_2, color='Submeter 2')) +
geom_line(aes(x=DateTime, y=Sub_metering_3, color='Submeter 3')) +
geom_line(aes(x=DateTime, y=Sub_metering_0, color='Submeter 0')) +
scale_colour_brewer(palette = "Set1") +
theme_minimal()+
labs(x='DateTime', y='kWh',color="Legend", title = 'Energy Consumption in a 2008 Winter Monday (11th Feb, 08)')
###
## Subset the 9th day of January 2008 - 10 Minute frequency
houseDay10 <- filter(dfFullYr, Year == 2008 & Month == 2 & Day == 11 &
(minute(DateTime) == 0 | minute(DateTime) == 10 |
minute(DateTime) == 20 | minute(DateTime) == 30 |
minute(DateTime) == 40 | minute(DateTime) == 50))
## Plot sub-meter 1, 2 and 3 with title, legend and labels - 10 Minute frequency
plot_ly(houseDay10, x = ~DateTime, y = ~Sub_metering_1, name = 'Kitchen', type = 'scatter', mode = 'lines') %>%
add_trace(y = ~Sub_metering_2, name = 'Laundry Room', mode = 'lines') %>%
add_trace(y = ~Sub_metering_3, name = 'Water Heater & AC', mode = 'lines') %>%
add_trace(y = ~Sub_metering_0, name = 'Other Appliances', mode = 'lines') %>%
layout(title = "Power Consumption January 9th, 2008",
xaxis = list(title = "Time"),
yaxis = list (title = "Power (watt-hours)"))
A visualization showing the energy consumption over a week in Autumn of 2008
#Create a visualization with plotly for a Week of your choosing. Use all three sub-meters and make sure to label. Experiment with granularity.
week08 <- filter(dfFullYr, Year == 2007 & Month == 3 & week(DateTime) == 12)
## Plot sub-meter 1, 2 and 3 with title, legend and labels - 1 week
plot_ly(week08, x = ~DateTime, y = ~Sub_metering_1, name = 'Kitchen', type = 'scatter', mode = 'lines') %>%
add_trace(y = ~Sub_metering_2, name = 'Laundry Room', mode = 'lines') %>%
add_trace(y = ~Sub_metering_3, name = 'Water Heater & AC', mode = 'lines') %>%
add_trace(y = ~Sub_metering_0, name = 'Other Appliances', mode = 'lines') %>%
layout(title = "Power Consumption Nov 10th - 16th, 2008",
xaxis = list(title = "Day"),
yaxis = list (title = "Power (watt-hours)"))
#Energy consumption in a autumn day by meters
Autumn_week <- EnergyConsumption %>%
filter(DateTime > "2008-11-10" & DateTime < "2008-11-16")
ggplot(Autumn_week) +
geom_line(aes(x=DateTime, y=Sub_metering_1, color='Kitchen')) +
geom_line(aes(x=DateTime, y=Sub_metering_2, color='Laundry Room')) +
geom_line(aes(x=DateTime, y=Sub_metering_3, color='Water Heater & AC')) +
geom_line(aes(x=DateTime, y=Sub_metering_0, color='Other Appliances')) +
scale_colour_brewer(palette = "Set1") +
theme_minimal()+
labs(x='DateTime', y='kWh',color="Legend", title = 'Power Consumption Nov 10th - 16th, 2008')
A visualization showing the energy consumption in Spring of 2008
#Enery Consumption in Spring
Spring_Period <- EnergyConsumption %>%
filter(DateTime > "2008-03-21" & DateTime < "2008-06-20")
ggplot(Spring_Period) +
geom_line(aes(x=DateTime, y=Sub_metering_1, color='Submeter 1')) +
geom_line(aes(x=DateTime, y=Sub_metering_2, color='Submeter 2')) +
geom_line(aes(x=DateTime, y=Sub_metering_3, color='Submeter 3')) +
geom_line(aes(x=DateTime, y=Sub_metering_0, color='Submeter 0')) +
theme_minimal()+
scale_colour_brewer(palette = "Set1") +
labs(x='DateTime', y='kWh',color="Legend", title = 'Energy Consumption in 2008 Spring')
Since our plot has a lot of overlapping and cannot be interpretted this way, we will split each submeter and plot it on its own to better visualize the data
#Splitting by submeter
Spring_Period <- EnergyConsumption %>%
filter(DateTime > "2008-03-21" & DateTime < "2008-06-20") %>%
group_by(DateTime = DateTime,
HOUR = hour(DateTime),
DAY = day(DateTime),
MONTH = month(DateTime)) %>%
summarise(SM_Others = sum(Sub_metering_0),
SM_Kitchen = sum(Sub_metering_1),
SM_Laundry = sum(Sub_metering_2),
SM_HeatAC = sum(Sub_metering_3)) %>%
ungroup() %>%
gather(key = "Submeters", value = "value", SM_Others, SM_Kitchen, SM_Laundry, SM_HeatAC)
ggplot(Spring_Period) +
geom_line(aes(x= DateTime, y= value, color=Submeters)) +
facet_grid(Submeters ~.) +
scale_colour_brewer(palette = "Set1") +
labs(x='DateTime', y='kWh',color="Legend", title = 'Energy Consumption in a 2008 autumn week') +
theme(legend.position="none")
We will subset to one observation per week, on Mondays at 8:00PM for all 3 years; Then autoplot the timeseries.
## Subset to one observation per week on Mondays at 8:00pm for 2007, 2008 and 2009
house070809weekly <- filter(dfFullYr, weekDay == "Monday" &
Hour == 20 & minute(DateTime) == 1)
## Create TS object with SubMeter1
tsSM1_070809weekly <- ts(house070809weekly$Sub_metering_1, frequency=52, start=c(2007,1))
## Plot sub-meter 1 with autoplot - add labels, color
autoplot(tsSM1_070809weekly, ts.colour = 'red', xlab = "Time", ylab = "Watt Hours", main = "Kitchen")
## Plot sub-meter 1 with plot.ts
plot.ts(tsSM1_070809weekly)
Apply time series linear regression to the sub-meter 1 ts object and use summary to obtain R2 and RMSE from the model you built
fitSM1 <- tslm(tsSM1_070809weekly ~ trend + season)
summary(fitSM1)
##
## Call:
## tslm(formula = tsSM1_070809weekly ~ trend + season)
##
## Residuals:
## Min 1Q Median 3Q Max
## -13.0000 -0.3832 0.0000 0.3832 25.6168
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5.821e-01 1.616e+00 0.360 0.719
## trend -7.369e-03 5.766e-03 -1.278 0.204
## season2 -1.842e-01 2.373e+00 -0.078 0.938
## season3 -1.769e-01 2.373e+00 -0.075 0.941
## season4 -1.695e-01 2.372e+00 -0.071 0.943
## season5 -1.621e-01 2.372e+00 -0.068 0.946
## season6 -1.547e-01 2.372e+00 -0.065 0.948
## season7 -1.474e-01 2.371e+00 -0.062 0.951
## season8 -1.400e-01 2.371e+00 -0.059 0.953
## season9 -1.326e-01 2.371e+00 -0.056 0.955
## season10 -1.253e-01 2.371e+00 -0.053 0.958
## season11 -1.179e-01 2.370e+00 -0.050 0.960
## season12 -1.105e-01 2.370e+00 -0.047 0.963
## season13 -1.032e-01 2.370e+00 -0.044 0.965
## season14 -9.579e-02 2.370e+00 -0.040 0.968
## season15 1.291e+01 2.370e+00 5.449 3.43e-07 ***
## season16 -8.106e-02 2.370e+00 -0.034 0.973
## season17 -7.369e-02 2.369e+00 -0.031 0.975
## season18 -6.632e-02 2.369e+00 -0.028 0.978
## season19 -5.895e-02 2.369e+00 -0.025 0.980
## season20 -5.158e-02 2.369e+00 -0.022 0.983
## season21 -4.421e-02 2.369e+00 -0.019 0.985
## season22 2.965e-01 2.369e+00 0.125 0.901
## season23 -2.948e-02 2.369e+00 -0.012 0.990
## season24 -2.211e-02 2.369e+00 -0.009 0.993
## season25 -1.474e-02 2.369e+00 -0.006 0.995
## season26 -7.369e-03 2.369e+00 -0.003 0.998
## season27 1.865e-15 2.369e+00 0.000 1.000
## season28 7.369e-03 2.369e+00 0.003 0.998
## season29 1.474e-02 2.369e+00 0.006 0.995
## season30 2.211e-02 2.369e+00 0.009 0.993
## season31 2.948e-02 2.369e+00 0.012 0.990
## season32 3.684e-02 2.369e+00 0.016 0.988
## season33 4.421e-02 2.369e+00 0.019 0.985
## season34 3.849e-01 2.369e+00 0.162 0.871
## season35 5.895e-02 2.369e+00 0.025 0.980
## season36 6.632e-02 2.369e+00 0.028 0.978
## season37 7.369e-02 2.369e+00 0.031 0.975
## season38 8.106e-02 2.370e+00 0.034 0.973
## season39 8.843e-02 2.370e+00 0.037 0.970
## season40 9.579e-02 2.370e+00 0.040 0.968
## season41 1.032e-01 2.370e+00 0.044 0.965
## season42 1.105e-01 2.370e+00 0.047 0.963
## season43 1.179e-01 2.370e+00 0.050 0.960
## season44 1.253e-01 2.371e+00 0.053 0.958
## season45 4.660e-01 2.371e+00 0.197 0.845
## season46 1.400e-01 2.371e+00 0.059 0.953
## season47 1.474e-01 2.371e+00 0.062 0.951
## season48 1.547e-01 2.372e+00 0.065 0.948
## season49 1.621e-01 2.372e+00 0.068 0.946
## season50 1.695e-01 2.372e+00 0.071 0.943
## season51 1.769e-01 2.373e+00 0.075 0.941
## season52 1.842e-01 2.373e+00 0.078 0.938
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.101 on 104 degrees of freedom
## Multiple R-squared: 0.3388, Adjusted R-squared: 0.00815
## F-statistic: 1.025 on 52 and 104 DF, p-value: 0.4491
## Create the forecast for sub-meter 1. Forecast ahead 20 time periods
forecastfitSM1 <- forecast(fitSM1, h=20)
## Plot the forecast for sub-meter 1.
plot(forecastfitSM1)
## Create sub-meter 1 forecast with confidence levels 80 and 90
forecastfitSM1c <- forecast(fitSM1, h=20, level=c(80,90))
## Plot sub-meter 1 forecast, limit y and add labels
plot(forecastfitSM1c, ylim = c(0, 20), ylab= "Watt-Hours", xlab="Time")
Decomposing Sub-meter 1 into trend, seasonal and remainder in order to understand the visualization better
## Decompose Sub-meter 1 into trend, seasonal and remainder
components070809SM1weekly <- decompose(tsSM1_070809weekly)
## Plot decomposed sub-meter 1
plot(components070809SM1weekly)
## Check summary statistics for decomposed sub-meter 1
summary(components070809SM1weekly)
## Length Class Mode
## x 157 ts numeric
## seasonal 157 ts numeric
## trend 157 ts numeric
## random 157 ts numeric
## figure 52 -none- numeric
## type 1 -none- character
## Seasonal adjusting sub-meter 1 by subtracting the seasonal component & plot
tsSM1_070809Adjusted <- tsSM1_070809weekly - components070809SM1weekly$seasonal
autoplot(tsSM1_070809Adjusted)
Use Holt Winters Exponential Smoothing & Plot
## Holt Winters Exponential Smoothing & Plot
tsSM1_HW070809 <- HoltWinters(tsSM1_070809Adjusted, beta=FALSE, gamma=FALSE)
plot(tsSM1_HW070809, ylim = c(0, 25))
## HoltWinters forecast & plot
tsSM1_HW070809for <- forecast(tsSM1_HW070809, h=25)
plot(tsSM1_HW070809for, ylim = c(0, 20), ylab= "Watt-Hours", xlab="Time - Sub-meter 1")
## Forecast HoltWinters with diminished confidence levels
tsSM1_HW070809forC <- forecast(tsSM1_HW070809, h=25, level=c(10,25))
## Plot only the forecasted area
plot(tsSM1_HW070809forC, ylim = c(0, 20), ylab= "Watt-Hours", xlab="Time - Sub-meter 1", start(2010))
## Create TS object with SubMeter2
tsSM2_070809weekly <- ts(house070809weekly$Sub_metering_2, frequency=52, start=c(2007,1))
## Plot sub-meter 2 with autoplot - add labels, color
autoplot(tsSM2_070809weekly, ts.colour = 'red', xlab = "Time", ylab = "Watt Hours", main = "Laundry")
## Plot sub-meter 2 with plot.ts
plot.ts(tsSM2_070809weekly)
Apply time series linear regression to the sub-meter 2 ts object and use summary to obtain R2 and RMSE from the model you built
fitSM2 <- tslm(tsSM2_070809weekly ~ trend + season)
summary(fitSM2)
##
## Call:
## tslm(formula = tsSM2_070809weekly ~ trend + season)
##
## Residuals:
## Min 1Q Median 3Q Max
## -21.7850 -0.3333 0.0000 0.2150 16.0000
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.326564 1.447496 0.226 0.822
## trend -0.004134 0.005164 -0.800 0.425
## season2 -0.103343 2.125382 -0.049 0.961
## season3 -0.099209 2.125074 -0.047 0.963
## season4 0.238258 2.124780 0.112 0.911
## season5 -0.090942 2.124497 -0.043 0.966
## season6 -0.086808 2.124227 -0.041 0.967
## season7 -0.082674 2.123970 -0.039 0.969
## season8 0.254793 2.123725 0.120 0.905
## season9 -0.074407 2.123493 -0.035 0.972
## season10 21.929727 2.123273 10.328 <2e-16 ***
## season11 0.267194 2.123066 0.126 0.900
## season12 0.271328 2.122871 0.128 0.899
## season13 -0.057872 2.122689 -0.027 0.978
## season14 0.279595 2.122519 0.132 0.895
## season15 0.617062 2.122362 0.291 0.772
## season16 -0.045471 2.122218 -0.021 0.983
## season17 -0.041337 2.122086 -0.019 0.984
## season18 0.296130 2.121966 0.140 0.889
## season19 0.633597 2.121860 0.299 0.766
## season20 0.304397 2.121765 0.143 0.886
## season21 0.308531 2.121684 0.145 0.885
## season22 0.312665 2.121615 0.147 0.883
## season23 0.983465 2.121558 0.464 0.644
## season24 -0.012401 2.121514 -0.006 0.995
## season25 0.658399 2.121483 0.310 0.757
## season26 0.995866 2.121464 0.469 0.640
## season27 1.666667 2.121457 0.786 0.434
## season28 0.337467 2.121464 0.159 0.874
## season29 0.341601 2.121483 0.161 0.872
## season30 0.345734 2.121514 0.163 0.871
## season31 0.683202 2.121558 0.322 0.748
## season32 0.020669 2.121615 0.010 0.992
## season33 0.024802 2.121684 0.012 0.991
## season34 0.028936 2.121765 0.014 0.989
## season35 1.033070 2.121860 0.487 0.627
## season36 0.037203 2.121966 0.018 0.986
## season37 1.374671 2.122086 0.648 0.519
## season38 0.712138 2.122218 0.336 0.738
## season39 1.049605 2.122362 0.495 0.622
## season40 0.387072 2.122519 0.182 0.856
## season41 0.391205 2.122689 0.184 0.854
## season42 0.395339 2.122871 0.186 0.853
## season43 0.066139 2.123066 0.031 0.975
## season44 0.070273 2.123273 0.033 0.974
## season45 0.074407 2.123493 0.035 0.972
## season46 0.745207 2.123725 0.351 0.726
## season47 0.416008 2.123970 0.196 0.845
## season48 0.086808 2.124227 0.041 0.967
## season49 0.757608 2.124497 0.357 0.722
## season50 0.761742 2.124780 0.359 0.721
## season51 0.099209 2.125074 0.047 0.963
## season52 0.103343 2.125382 0.049 0.961
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.778 on 104 degrees of freedom
## Multiple R-squared: 0.6371, Adjusted R-squared: 0.4556
## F-statistic: 3.511 on 52 and 104 DF, p-value: 2.59e-08
## Create the forecast for sub-meter 2. Forecast ahead 20 time periods
forecastfitSM2 <- forecast(fitSM2, h=20)
## Plot the forecast for sub-meter 2.
plot(forecastfitSM2)
## Create sub-meter 2 forecast with confidence levels 80 and 90
forecastfitSM2c <- forecast(fitSM2, h=20, level=c(80,90))
## Plot sub-meter 2 forecast, limit y and add labels
plot(forecastfitSM2c, ylim = c(0, 20), ylab= "Watt-Hours", xlab="Time")
Decomposing Sub-meter 2 into trend, seasonal and remainder in order to understand the visualization better
## Decompose Sub-meter 2 into trend, seasonal and remainder
components070809SM2weekly <- decompose(tsSM2_070809weekly)
## Plot decomposed sub-meter 2
plot(components070809SM2weekly)
## Check summary statistics for decomposed sub-meter 2
summary(components070809SM2weekly)
## Length Class Mode
## x 157 ts numeric
## seasonal 157 ts numeric
## trend 157 ts numeric
## random 157 ts numeric
## figure 52 -none- numeric
## type 1 -none- character
## Seasonal adjusting sub-meter 2 by subtracting the seasonal component & plot
tsSM2_070809Adjusted <- tsSM2_070809weekly - components070809SM2weekly$seasonal
autoplot(tsSM2_070809Adjusted)
Use Holt Winters Exponential Smoothing & Plot
## Holt Winters Exponential Smoothing & Plot
tsSM2_HW070809 <- HoltWinters(tsSM2_070809Adjusted, beta=FALSE, gamma=FALSE)
plot(tsSM2_HW070809, ylim = c(0, 25))
## HoltWinters forecast & plot
tsSM2_HW070809for <- forecast(tsSM2_HW070809, h=25)
plot(tsSM2_HW070809for, ylim = c(0, 20), ylab= "Watt-Hours", xlab="Time - Sub-meter 2")
## Forecast HoltWinters with diminished confidence levels
tsSM2_HW070809forC <- forecast(tsSM2_HW070809, h=25, level=c(10,25))
## Plot only the forecasted area
plot(tsSM2_HW070809forC, ylim = c(0, 20), ylab= "Watt-Hours", xlab="Time - Sub-meter 2", start(2010))
## Create TS object with SubMeter3
tsSM3_070809weekly <- ts(house070809weekly$Sub_metering_3, frequency=52, start=c(2007,1))
## Plot sub-meter 3 with autoplot - add labels, color
autoplot(tsSM3_070809weekly, ts.colour = 'red', xlab = "Time", ylab = "Watt Hours", main = "HeaterAC")
## Plot sub-meter 3 with plot.ts
plot.ts(tsSM3_070809weekly)
Apply time series linear regression to the sub-meter 3 ts object and use summary to obtain R2 and RMSE from the model you built
fitSM3 <- tslm(tsSM3_070809weekly ~ trend + season)
summary(fitSM3)
##
## Call:
## tslm(formula = tsSM3_070809weekly ~ trend + season)
##
## Residuals:
## Min 1Q Median 3Q Max
## -11.7336 -2.4003 -0.3333 1.7336 20.0000
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 11.88381 3.23073 3.678 0.000374 ***
## trend -0.03334 0.01153 -2.893 0.004654 **
## season2 -4.08348 4.74373 -0.861 0.391319
## season3 -4.38348 4.74305 -0.924 0.357527
## season4 -10.01680 4.74239 -2.112 0.037065 *
## season5 -4.31680 4.74176 -0.910 0.364728
## season6 -9.95013 4.74115 -2.099 0.038267 *
## season7 -9.91679 4.74058 -2.092 0.038883 *
## season8 -4.21678 4.74003 -0.890 0.375729
## season9 -9.85011 4.73951 -2.078 0.040144 *
## season10 -9.81677 4.73902 -2.071 0.040789 *
## season11 -3.78343 4.73856 -0.798 0.426438
## season12 -3.75009 4.73813 -0.791 0.430471
## season13 -9.71675 4.73772 -2.051 0.042787 *
## season14 -9.68341 4.73734 -2.044 0.043475 *
## season15 -9.31674 4.73699 -1.967 0.051870 .
## season16 -8.95007 4.73667 -1.890 0.061606 .
## season17 -9.25006 4.73637 -1.953 0.053509 .
## season18 -9.21672 4.73611 -1.946 0.054347 .
## season19 -9.18338 4.73587 -1.939 0.055197 .
## season20 -3.15004 4.73566 -0.665 0.507411
## season21 -9.11670 4.73548 -1.925 0.056937 .
## season22 -8.75003 4.73532 -1.848 0.067470 .
## season23 -3.38336 4.73520 -0.715 0.476511
## season24 -9.35002 4.73510 -1.975 0.050963 .
## season25 -8.98335 4.73503 -1.897 0.060574 .
## season26 -8.61667 4.73499 -1.820 0.071668 .
## season27 -9.25000 4.73497 -1.954 0.053441 .
## season28 -8.88333 4.73499 -1.876 0.063445 .
## season29 -3.18332 4.73503 -0.672 0.502889
## season30 0.85002 4.73510 0.180 0.857883
## season31 -8.44998 4.73520 -1.785 0.077258 .
## season32 -8.41664 4.73532 -1.777 0.078423 .
## season33 -3.04996 4.73548 -0.644 0.520950
## season34 -2.68329 4.73566 -0.567 0.572197
## season35 -8.31662 4.73587 -1.756 0.082017 .
## season36 -8.61661 4.73611 -1.819 0.071736 .
## season37 -2.91661 4.73637 -0.616 0.539379
## season38 -8.21660 4.73667 -1.735 0.085760 .
## season39 -2.84993 4.73699 -0.602 0.548728
## season40 -8.48326 4.73734 -1.791 0.076247 .
## season41 -8.11658 4.73772 -1.713 0.089658 .
## season42 -8.41658 4.73813 -1.776 0.078599 .
## season43 -2.04990 4.73856 -0.433 0.666201
## season44 -8.34990 4.73902 -1.762 0.081017 .
## season45 -8.64989 4.73951 -1.825 0.070862 .
## season46 -2.61655 4.74003 -0.552 0.582125
## season47 -8.24988 4.74058 -1.740 0.084770 .
## season48 -8.21654 4.74115 -1.733 0.086055 .
## season49 -8.18320 4.74176 -1.726 0.087358 .
## season50 -8.14986 4.74239 -1.719 0.088678 .
## season51 -2.44986 4.74305 -0.517 0.606591
## season52 -2.74985 4.74373 -0.580 0.563384
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 6.2 on 104 degrees of freedom
## Multiple R-squared: 0.308, Adjusted R-squared: -0.03797
## F-statistic: 0.8903 on 52 and 104 DF, p-value: 0.6745
## Create the forecast for sub-meter 3. Forecast ahead 20 time periods
forecastfitSM3 <- forecast(fitSM3, h=20)
## Plot the forecast for sub-meter 3.
plot(forecastfitSM3)
## Create sub-meter 3 forecast with confidence levels 80 and 90
forecastfitSM3c <- forecast(fitSM3, h=20, level=c(80,90))
## Plot sub-meter 3 forecast, limit y and add labels
plot(forecastfitSM3c, ylim = c(0, 20), ylab= "Watt-Hours", xlab="Time")
Decomposing sub-meter 3 into trend, seasonal and remainder in order to understand the visualization better
## Decompose sub-meter 3 into trend, seasonal and remainder
components070809SM3weekly <- decompose(tsSM3_070809weekly)
## Plot decomposed sub-meter 3
plot(components070809SM3weekly)
## Check summary statistics for decomposed sub-meter 3
summary(components070809SM3weekly)
## Length Class Mode
## x 157 ts numeric
## seasonal 157 ts numeric
## trend 157 ts numeric
## random 157 ts numeric
## figure 52 -none- numeric
## type 1 -none- character
## Seasonal adjusting sub-meter 3 by subtracting the seasonal component & plot
tsSM3_070809Adjusted <- tsSM3_070809weekly - components070809SM3weekly$seasonal
autoplot(tsSM3_070809Adjusted)
Use Holt Winters Exponential Smoothing & Plot
## Holt Winters Exponential Smoothing & Plot
tsSM3_HW070809 <- HoltWinters(tsSM3_070809Adjusted, beta=FALSE, gamma=FALSE)
plot(tsSM3_HW070809, ylim = c(0, 25))
## HoltWinters forecast & plot
tsSM3_HW070809for <- forecast(tsSM3_HW070809, h=25)
plot(tsSM3_HW070809for, ylim = c(0, 20), ylab= "Watt-Hours", xlab="Time - sub-meter 3")
## Forecast HoltWinters with diminished confidence levels
tsSM3_HW070809forC <- forecast(tsSM3_HW070809, h=25, level=c(10,25))
## Plot only the forecasted area
plot(tsSM3_HW070809forC, ylim = c(0, 20), ylab= "Watt-Hours", xlab="Time - sub-meter 3", start(2010))
## Create TS object with SubMeter0
tsSM0_070809weekly <- ts(house070809weekly$Sub_metering_0, frequency=52, start=c(2007,1))
## Plot sub-meter 0 with autoplot - add labels, color
autoplot(tsSM0_070809weekly, ts.colour = 'red', xlab = "Time", ylab = "Watt Hours", main = "Other Appliances")
## Plot sub-meter 0 with plot.ts
plot.ts(tsSM0_070809weekly)
Apply time series linear regression to the sub-meter 0 ts object and use summary to obtain R2 and RMSE from the model you built
fitSM0 <- tslm(tsSM0_070809weekly ~ trend + season)
summary(fitSM0)
##
## Call:
## tslm(formula = tsSM0_070809weekly ~ trend + season)
##
## Residuals:
## Min 1Q Median 3Q Max
## -22.178 -1.713 -0.198 1.476 27.142
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 20.407057 4.079816 5.002 2.32e-06 ***
## trend -0.004098 0.014555 -0.282 0.77885
## season2 -6.985778 5.990460 -1.166 0.24622
## season3 -2.481680 5.989593 -0.414 0.67948
## season4 -3.644248 5.988762 -0.609 0.54417
## season5 -9.151261 5.987966 -1.528 0.12948
## season6 -11.036052 5.987206 -1.843 0.06814 .
## season7 -9.854176 5.986480 -1.646 0.10277
## season8 -3.627857 5.985790 -0.606 0.54578
## season9 -13.857091 5.985135 -2.315 0.02256 *
## season10 -11.564106 5.984516 -1.932 0.05604 .
## season11 -14.104452 5.983932 -2.357 0.02029 *
## season12 -15.333688 5.983383 -2.563 0.01182 *
## season13 -11.651812 5.982870 -1.948 0.05417 .
## season14 -14.492159 5.982392 -2.422 0.01714 *
## season15 -16.132506 5.981949 -2.697 0.00817 **
## season16 -14.383964 5.981542 -2.405 0.01795 *
## season17 -15.890977 5.981170 -2.657 0.00913 **
## season18 -16.097990 5.980833 -2.692 0.00829 **
## season19 -16.282781 5.980532 -2.723 0.00760 **
## season20 -13.434239 5.980267 -2.246 0.02679 *
## season21 -15.007919 5.980036 -2.510 0.01363 *
## season22 -12.937155 5.979842 -2.163 0.03280 *
## season23 -13.655279 5.979682 -2.284 0.02443 *
## season24 -15.462292 5.979558 -2.586 0.01110 *
## season25 -18.402639 5.979470 -3.078 0.00267 **
## season26 -18.554097 5.979416 -3.103 0.00247 **
## season27 -14.949999 5.979399 -2.500 0.01397 *
## season28 -16.523679 5.979416 -2.763 0.00677 **
## season29 -14.175137 5.979470 -2.371 0.01960 *
## season30 -15.793261 5.979558 -2.641 0.00953 **
## season31 -14.911386 5.979682 -2.494 0.01422 *
## season32 -16.962843 5.979842 -2.837 0.00548 **
## season33 -16.669857 5.980036 -2.788 0.00631 **
## season34 -16.521315 5.980267 -2.763 0.00678 **
## season35 -18.050550 5.980532 -3.018 0.00320 **
## season36 -15.768675 5.980833 -2.637 0.00966 **
## season37 -17.953466 5.981170 -3.002 0.00336 **
## season38 -13.460479 5.981542 -2.250 0.02653 *
## season39 -15.434159 5.981949 -2.580 0.01127 *
## season40 -14.696728 5.982392 -2.457 0.01568 *
## season41 -15.148186 5.982870 -2.532 0.01284 *
## season42 -14.444088 5.983383 -2.414 0.01752 *
## season43 -14.839990 5.983932 -2.480 0.01474 *
## season44 -14.591448 5.984516 -2.438 0.01646 *
## season45 2.212649 5.985135 0.370 0.71236
## season46 6.227858 5.985790 1.040 0.30055
## season47 -6.212488 5.986480 -1.038 0.30179
## season48 -4.986168 5.987206 -0.833 0.40686
## season49 1.962375 5.987966 0.328 0.74378
## season50 2.588694 5.988762 0.432 0.66645
## season51 15.381681 5.989593 2.568 0.01165 *
## season52 7.063558 5.990460 1.179 0.24104
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 7.829 on 104 degrees of freedom
## Multiple R-squared: 0.585, Adjusted R-squared: 0.3775
## F-statistic: 2.82 on 52 and 104 DF, p-value: 3.568e-06
## Create the forecast for sub-meter 0. Forecast ahead 20 time periods
forecastfitSM0 <- forecast(fitSM0, h=20)
## Plot the forecast for sub-meter 0.
plot(forecastfitSM0)
## Create sub-meter 0 forecast with confidence levels 80 and 90
forecastfitSM0c <- forecast(fitSM0, h=20, level=c(80,90))
## Plot sub-meter 0 forecast, limit y and add labels
plot(forecastfitSM0c, ylim = c(0, 20), ylab= "Watt-Hours", xlab="Time")
Decomposing sub-meter 0 into trend, seasonal and remainder in order to understand the visualization better
## Decompose sub-meter 0 into trend, seasonal and remainder
components070809SM0weekly <- decompose(tsSM0_070809weekly)
## Plot decomposed sub-meter 0
plot(components070809SM0weekly)
## Check summary statistics for decomposed sub-meter 0
summary(components070809SM0weekly)
## Length Class Mode
## x 157 ts numeric
## seasonal 157 ts numeric
## trend 157 ts numeric
## random 157 ts numeric
## figure 52 -none- numeric
## type 1 -none- character
## Seasonal adjusting sub-meter 0 by subtracting the seasonal component & plot
tsSM0_070809Adjusted <- tsSM0_070809weekly - components070809SM0weekly$seasonal
autoplot(tsSM0_070809Adjusted)
Use Holt Winters Exponential Smoothing & Plot
## Holt Winters Exponential Smoothing & Plot
tsSM0_HW070809 <- HoltWinters(tsSM0_070809Adjusted, beta=FALSE, gamma=FALSE)
plot(tsSM0_HW070809, ylim = c(0, 25))
## HoltWinters forecast & plot
tsSM0_HW070809for <- forecast(tsSM0_HW070809, h=25)
plot(tsSM0_HW070809for, ylim = c(0, 20), ylab= "Watt-Hours", xlab="Time - sub-meter 0")
## Forecast HoltWinters with diminished confidence levels
tsSM0_HW070809forC <- forecast(tsSM0_HW070809, h=25, level=c(10,25))
## Plot only the forecasted area
plot(tsSM0_HW070809forC, ylim = c(0, 20), ylab= "Watt-Hours", xlab="Time - sub-meter 0", start(2010))
## Table of model errors
Model <- c('R-Squared','p-value')
SM_Kitchen <- c(0.02617, 0.5393)
SM_Laundry <- c(0.02617, 0.5393)
SM_HeatAC <- c(0.308, 0.6745)
metrics <- data.frame(Model, SM_Kitchen, SM_Laundry, SM_HeatAC)
kable(metrics) %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"),fixed_thead = T)
| Model | SM_Kitchen | SM_Laundry | SM_HeatAC |
|---|---|---|---|
| R-Squared | 0.02617 | 0.02617 | 0.3080 |
| p-value | 0.53930 | 0.53930 | 0.6745 |