Energy Consumption Analysis of a Smart Home

Business Question

Explore some visualizations of the data and then build predictive models that will demonsrate to the client (home builder) how the data can be used to help a home owner make a decision about altering power consumptions. To provide imediate value, we need to give the client five business suggestions based on the insights we gleaned from the analysis.

Visualizations

We created some exploratory visualizations with no goal, with the intention to understand the data better. It is a good way to explore what can be done with the data and what limitations we may face.

One day line graph visualization of all submeters

Plotly

#create day dataset
Winter_day <- EnergyConsumption %>%
  filter(day(DateTime) == 11 & month(DateTime) == 2 & year(DateTime) == 2008)

#plot using plotly
plot_ly(Winter_day, x = ~DateTime, y = ~Sub_metering_1, name = 'Kitchen', type = 'scatter', mode = 'lines') %>%
  add_trace(y = ~Sub_metering_2, name = 'Laundry Room', mode = 'lines') %>%
  add_trace(y = ~Sub_metering_3, name = 'Water Heater & AC', mode = 'lines')%>%
  add_trace(y = ~Sub_metering_0, name = 'Other Appliances', mode = 'lines') %>%
  layout(title = "Power Consumption in a 2008 Winter Monday (11th Feb, 08)",
         xaxis = list(title = "Day"),
         yaxis = list (title = "Power (watt-hours)"))

ggplot

#Visualizations --------------------------
#plot
ggplot(Winter_day) + 
  geom_line(aes(x=DateTime, y=Sub_metering_1, color='Submeter 1')) +
  geom_line(aes(x=DateTime, y=Sub_metering_2, color='Submeter 2')) +
  geom_line(aes(x=DateTime, y=Sub_metering_3, color='Submeter 3')) +
  geom_line(aes(x=DateTime, y=Sub_metering_0, color='Submeter 0')) +
  scale_colour_brewer(palette = "Set1") +
  theme_minimal()+
  labs(x='DateTime', y='kWh',color="Legend", title = 'Energy Consumption in a 2008 Winter Monday (11th Feb, 08)')

###

Decrease granularity by plotting readings at 10 min interval

## Subset the 9th day of January 2008 - 10 Minute frequency
houseDay10 <- filter(dfFullYr, Year == 2008 & Month == 2 & Day == 11 & 
                       (minute(DateTime) == 0 | minute(DateTime) == 10 | 
                          minute(DateTime) == 20 | minute(DateTime) == 30 |
                          minute(DateTime) == 40 | minute(DateTime) == 50))
                       
## Plot sub-meter 1, 2 and 3 with title, legend and labels - 10 Minute frequency
plot_ly(houseDay10, x = ~DateTime, y = ~Sub_metering_1, name = 'Kitchen', type = 'scatter', mode = 'lines') %>%
  add_trace(y = ~Sub_metering_2, name = 'Laundry Room', mode = 'lines') %>%
  add_trace(y = ~Sub_metering_3, name = 'Water Heater & AC', mode = 'lines') %>%
  add_trace(y = ~Sub_metering_0, name = 'Other Appliances', mode = 'lines') %>%
  layout(title = "Power Consumption January 9th, 2008",
         xaxis = list(title = "Time"),
         yaxis = list (title = "Power (watt-hours)"))

A line graph visualization of all submeters for one week

A visualization showing the energy consumption over a week in Autumn of 2008

Plotly

#Create a visualization with plotly for a Week of your choosing. Use all three sub-meters and make sure to label. Experiment with granularity. 
week08 <- filter(dfFullYr, Year == 2007 & Month == 3 & week(DateTime) == 12)

## Plot sub-meter 1, 2 and 3 with title, legend and labels - 1 week
plot_ly(week08, x = ~DateTime, y = ~Sub_metering_1, name = 'Kitchen', type = 'scatter', mode = 'lines') %>%
  add_trace(y = ~Sub_metering_2, name = 'Laundry Room', mode = 'lines') %>%
  add_trace(y = ~Sub_metering_3, name = 'Water Heater & AC', mode = 'lines') %>%
  add_trace(y = ~Sub_metering_0, name = 'Other Appliances', mode = 'lines') %>%
  layout(title = "Power Consumption Nov 10th - 16th, 2008",
         xaxis = list(title = "Day"),
         yaxis = list (title = "Power (watt-hours)"))

ggplot2

#Energy consumption in a autumn day by meters  
Autumn_week <- EnergyConsumption %>%
  filter(DateTime > "2008-11-10" & DateTime < "2008-11-16")

ggplot(Autumn_week) + 
  geom_line(aes(x=DateTime, y=Sub_metering_1, color='Kitchen')) +
  geom_line(aes(x=DateTime, y=Sub_metering_2, color='Laundry Room')) +
  geom_line(aes(x=DateTime, y=Sub_metering_3, color='Water Heater & AC')) +
  geom_line(aes(x=DateTime, y=Sub_metering_0, color='Other Appliances')) +
  scale_colour_brewer(palette = "Set1") +
  theme_minimal()+
  labs(x='DateTime', y='kWh',color="Legend", title = 'Power Consumption Nov 10th - 16th, 2008')

A line graph visualization of all submeters during Spring of 2008

A visualization showing the energy consumption in Spring of 2008

#Enery Consumption in Spring
Spring_Period <- EnergyConsumption %>%
  filter(DateTime > "2008-03-21" & DateTime < "2008-06-20")

ggplot(Spring_Period) + 
  geom_line(aes(x=DateTime, y=Sub_metering_1, color='Submeter 1')) +
  geom_line(aes(x=DateTime, y=Sub_metering_2, color='Submeter 2')) +
  geom_line(aes(x=DateTime, y=Sub_metering_3, color='Submeter 3')) +
  geom_line(aes(x=DateTime, y=Sub_metering_0, color='Submeter 0')) +
  theme_minimal()+
  scale_colour_brewer(palette = "Set1") +
  labs(x='DateTime', y='kWh',color="Legend", title = 'Energy Consumption in 2008 Spring')

Since our plot has a lot of overlapping and cannot be interpretted this way, we will split each submeter and plot it on its own to better visualize the data

#Splitting by submeter 
Spring_Period <- EnergyConsumption %>%
  filter(DateTime > "2008-03-21" & DateTime < "2008-06-20") %>%
  group_by(DateTime = DateTime, 
            HOUR = hour(DateTime), 
            DAY = day(DateTime),
            MONTH = month(DateTime)) %>%
  summarise(SM_Others = sum(Sub_metering_0),
            SM_Kitchen = sum(Sub_metering_1),
            SM_Laundry = sum(Sub_metering_2),
            SM_HeatAC = sum(Sub_metering_3)) %>% 
  ungroup() %>%
  gather(key = "Submeters", value = "value", SM_Others, SM_Kitchen, SM_Laundry, SM_HeatAC)

ggplot(Spring_Period) +
  geom_line(aes(x= DateTime, y= value, color=Submeters)) +
  facet_grid(Submeters ~.) +
  scale_colour_brewer(palette = "Set1") +
  labs(x='DateTime', y='kWh',color="Legend", title = 'Energy Consumption in a 2008 autumn week') +
  theme(legend.position="none")

Timeseries plots for each submeters

We will subset to one observation per week, on Mondays at 8:00PM for all 3 years; Then autoplot the timeseries.

Kitchen

## Subset to one observation per week on Mondays at 8:00pm for 2007, 2008 and 2009
house070809weekly <- filter(dfFullYr, weekDay == "Monday" & 
                              Hour == 20 & minute(DateTime) == 1)

## Create TS object with SubMeter1
tsSM1_070809weekly <- ts(house070809weekly$Sub_metering_1, frequency=52, start=c(2007,1))

## Plot sub-meter 1 with autoplot - add labels, color
autoplot(tsSM1_070809weekly, ts.colour = 'red', xlab = "Time", ylab = "Watt Hours", main = "Kitchen")

## Plot sub-meter 1 with plot.ts
plot.ts(tsSM1_070809weekly)

Apply time series linear regression to the sub-meter 1 ts object and use summary to obtain R2 and RMSE from the model you built

fitSM1 <- tslm(tsSM1_070809weekly ~ trend + season) 
summary(fitSM1)
## 
## Call:
## tslm(formula = tsSM1_070809weekly ~ trend + season)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -13.0000  -0.3832   0.0000   0.3832  25.6168 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  5.821e-01  1.616e+00   0.360    0.719    
## trend       -7.369e-03  5.766e-03  -1.278    0.204    
## season2     -1.842e-01  2.373e+00  -0.078    0.938    
## season3     -1.769e-01  2.373e+00  -0.075    0.941    
## season4     -1.695e-01  2.372e+00  -0.071    0.943    
## season5     -1.621e-01  2.372e+00  -0.068    0.946    
## season6     -1.547e-01  2.372e+00  -0.065    0.948    
## season7     -1.474e-01  2.371e+00  -0.062    0.951    
## season8     -1.400e-01  2.371e+00  -0.059    0.953    
## season9     -1.326e-01  2.371e+00  -0.056    0.955    
## season10    -1.253e-01  2.371e+00  -0.053    0.958    
## season11    -1.179e-01  2.370e+00  -0.050    0.960    
## season12    -1.105e-01  2.370e+00  -0.047    0.963    
## season13    -1.032e-01  2.370e+00  -0.044    0.965    
## season14    -9.579e-02  2.370e+00  -0.040    0.968    
## season15     1.291e+01  2.370e+00   5.449 3.43e-07 ***
## season16    -8.106e-02  2.370e+00  -0.034    0.973    
## season17    -7.369e-02  2.369e+00  -0.031    0.975    
## season18    -6.632e-02  2.369e+00  -0.028    0.978    
## season19    -5.895e-02  2.369e+00  -0.025    0.980    
## season20    -5.158e-02  2.369e+00  -0.022    0.983    
## season21    -4.421e-02  2.369e+00  -0.019    0.985    
## season22     2.965e-01  2.369e+00   0.125    0.901    
## season23    -2.948e-02  2.369e+00  -0.012    0.990    
## season24    -2.211e-02  2.369e+00  -0.009    0.993    
## season25    -1.474e-02  2.369e+00  -0.006    0.995    
## season26    -7.369e-03  2.369e+00  -0.003    0.998    
## season27     1.865e-15  2.369e+00   0.000    1.000    
## season28     7.369e-03  2.369e+00   0.003    0.998    
## season29     1.474e-02  2.369e+00   0.006    0.995    
## season30     2.211e-02  2.369e+00   0.009    0.993    
## season31     2.948e-02  2.369e+00   0.012    0.990    
## season32     3.684e-02  2.369e+00   0.016    0.988    
## season33     4.421e-02  2.369e+00   0.019    0.985    
## season34     3.849e-01  2.369e+00   0.162    0.871    
## season35     5.895e-02  2.369e+00   0.025    0.980    
## season36     6.632e-02  2.369e+00   0.028    0.978    
## season37     7.369e-02  2.369e+00   0.031    0.975    
## season38     8.106e-02  2.370e+00   0.034    0.973    
## season39     8.843e-02  2.370e+00   0.037    0.970    
## season40     9.579e-02  2.370e+00   0.040    0.968    
## season41     1.032e-01  2.370e+00   0.044    0.965    
## season42     1.105e-01  2.370e+00   0.047    0.963    
## season43     1.179e-01  2.370e+00   0.050    0.960    
## season44     1.253e-01  2.371e+00   0.053    0.958    
## season45     4.660e-01  2.371e+00   0.197    0.845    
## season46     1.400e-01  2.371e+00   0.059    0.953    
## season47     1.474e-01  2.371e+00   0.062    0.951    
## season48     1.547e-01  2.372e+00   0.065    0.948    
## season49     1.621e-01  2.372e+00   0.068    0.946    
## season50     1.695e-01  2.372e+00   0.071    0.943    
## season51     1.769e-01  2.373e+00   0.075    0.941    
## season52     1.842e-01  2.373e+00   0.078    0.938    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.101 on 104 degrees of freedom
## Multiple R-squared:  0.3388, Adjusted R-squared:  0.00815 
## F-statistic: 1.025 on 52 and 104 DF,  p-value: 0.4491
## Create the forecast for sub-meter 1. Forecast ahead 20 time periods 
forecastfitSM1 <- forecast(fitSM1, h=20)
## Plot the forecast for sub-meter 1. 
plot(forecastfitSM1)

## Create sub-meter 1 forecast with confidence levels 80 and 90
forecastfitSM1c <- forecast(fitSM1, h=20, level=c(80,90))

## Plot sub-meter 1 forecast, limit y and add labels
plot(forecastfitSM1c, ylim = c(0, 20), ylab= "Watt-Hours", xlab="Time")

Decomposing Sub-meter 1 into trend, seasonal and remainder in order to understand the visualization better

## Decompose Sub-meter 1 into trend, seasonal and remainder
components070809SM1weekly <- decompose(tsSM1_070809weekly)
## Plot decomposed sub-meter 1 
plot(components070809SM1weekly)

## Check summary statistics for decomposed sub-meter 1 
summary(components070809SM1weekly)
##          Length Class  Mode     
## x        157    ts     numeric  
## seasonal 157    ts     numeric  
## trend    157    ts     numeric  
## random   157    ts     numeric  
## figure    52    -none- numeric  
## type       1    -none- character
## Seasonal adjusting sub-meter 1 by subtracting the seasonal component & plot
tsSM1_070809Adjusted <- tsSM1_070809weekly - components070809SM1weekly$seasonal
autoplot(tsSM1_070809Adjusted)

Use Holt Winters Exponential Smoothing & Plot

## Holt Winters Exponential Smoothing & Plot
tsSM1_HW070809 <- HoltWinters(tsSM1_070809Adjusted, beta=FALSE, gamma=FALSE)
plot(tsSM1_HW070809, ylim = c(0, 25))

## HoltWinters forecast & plot
tsSM1_HW070809for <- forecast(tsSM1_HW070809, h=25)
plot(tsSM1_HW070809for, ylim = c(0, 20), ylab= "Watt-Hours", xlab="Time - Sub-meter 1")

## Forecast HoltWinters with diminished confidence levels
tsSM1_HW070809forC <- forecast(tsSM1_HW070809, h=25, level=c(10,25))
## Plot only the forecasted area
plot(tsSM1_HW070809forC, ylim = c(0, 20), ylab= "Watt-Hours", xlab="Time - Sub-meter 1", start(2010))

Laundry

## Create TS object with SubMeter2
tsSM2_070809weekly <- ts(house070809weekly$Sub_metering_2, frequency=52, start=c(2007,1))

## Plot sub-meter 2 with autoplot - add labels, color
autoplot(tsSM2_070809weekly, ts.colour = 'red', xlab = "Time", ylab = "Watt Hours", main = "Laundry")

## Plot sub-meter 2 with plot.ts
plot.ts(tsSM2_070809weekly)

Apply time series linear regression to the sub-meter 2 ts object and use summary to obtain R2 and RMSE from the model you built

fitSM2 <- tslm(tsSM2_070809weekly ~ trend + season) 
summary(fitSM2)
## 
## Call:
## tslm(formula = tsSM2_070809weekly ~ trend + season)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -21.7850  -0.3333   0.0000   0.2150  16.0000 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.326564   1.447496   0.226    0.822    
## trend       -0.004134   0.005164  -0.800    0.425    
## season2     -0.103343   2.125382  -0.049    0.961    
## season3     -0.099209   2.125074  -0.047    0.963    
## season4      0.238258   2.124780   0.112    0.911    
## season5     -0.090942   2.124497  -0.043    0.966    
## season6     -0.086808   2.124227  -0.041    0.967    
## season7     -0.082674   2.123970  -0.039    0.969    
## season8      0.254793   2.123725   0.120    0.905    
## season9     -0.074407   2.123493  -0.035    0.972    
## season10    21.929727   2.123273  10.328   <2e-16 ***
## season11     0.267194   2.123066   0.126    0.900    
## season12     0.271328   2.122871   0.128    0.899    
## season13    -0.057872   2.122689  -0.027    0.978    
## season14     0.279595   2.122519   0.132    0.895    
## season15     0.617062   2.122362   0.291    0.772    
## season16    -0.045471   2.122218  -0.021    0.983    
## season17    -0.041337   2.122086  -0.019    0.984    
## season18     0.296130   2.121966   0.140    0.889    
## season19     0.633597   2.121860   0.299    0.766    
## season20     0.304397   2.121765   0.143    0.886    
## season21     0.308531   2.121684   0.145    0.885    
## season22     0.312665   2.121615   0.147    0.883    
## season23     0.983465   2.121558   0.464    0.644    
## season24    -0.012401   2.121514  -0.006    0.995    
## season25     0.658399   2.121483   0.310    0.757    
## season26     0.995866   2.121464   0.469    0.640    
## season27     1.666667   2.121457   0.786    0.434    
## season28     0.337467   2.121464   0.159    0.874    
## season29     0.341601   2.121483   0.161    0.872    
## season30     0.345734   2.121514   0.163    0.871    
## season31     0.683202   2.121558   0.322    0.748    
## season32     0.020669   2.121615   0.010    0.992    
## season33     0.024802   2.121684   0.012    0.991    
## season34     0.028936   2.121765   0.014    0.989    
## season35     1.033070   2.121860   0.487    0.627    
## season36     0.037203   2.121966   0.018    0.986    
## season37     1.374671   2.122086   0.648    0.519    
## season38     0.712138   2.122218   0.336    0.738    
## season39     1.049605   2.122362   0.495    0.622    
## season40     0.387072   2.122519   0.182    0.856    
## season41     0.391205   2.122689   0.184    0.854    
## season42     0.395339   2.122871   0.186    0.853    
## season43     0.066139   2.123066   0.031    0.975    
## season44     0.070273   2.123273   0.033    0.974    
## season45     0.074407   2.123493   0.035    0.972    
## season46     0.745207   2.123725   0.351    0.726    
## season47     0.416008   2.123970   0.196    0.845    
## season48     0.086808   2.124227   0.041    0.967    
## season49     0.757608   2.124497   0.357    0.722    
## season50     0.761742   2.124780   0.359    0.721    
## season51     0.099209   2.125074   0.047    0.963    
## season52     0.103343   2.125382   0.049    0.961    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.778 on 104 degrees of freedom
## Multiple R-squared:  0.6371, Adjusted R-squared:  0.4556 
## F-statistic: 3.511 on 52 and 104 DF,  p-value: 2.59e-08
## Create the forecast for sub-meter 2. Forecast ahead 20 time periods 
forecastfitSM2 <- forecast(fitSM2, h=20)
## Plot the forecast for sub-meter 2. 
plot(forecastfitSM2)

## Create sub-meter 2 forecast with confidence levels 80 and 90
forecastfitSM2c <- forecast(fitSM2, h=20, level=c(80,90))

## Plot sub-meter 2 forecast, limit y and add labels
plot(forecastfitSM2c, ylim = c(0, 20), ylab= "Watt-Hours", xlab="Time")

Decomposing Sub-meter 2 into trend, seasonal and remainder in order to understand the visualization better

## Decompose Sub-meter 2 into trend, seasonal and remainder
components070809SM2weekly <- decompose(tsSM2_070809weekly)
## Plot decomposed sub-meter 2 
plot(components070809SM2weekly)

## Check summary statistics for decomposed sub-meter 2 
summary(components070809SM2weekly)
##          Length Class  Mode     
## x        157    ts     numeric  
## seasonal 157    ts     numeric  
## trend    157    ts     numeric  
## random   157    ts     numeric  
## figure    52    -none- numeric  
## type       1    -none- character
## Seasonal adjusting sub-meter 2 by subtracting the seasonal component & plot
tsSM2_070809Adjusted <- tsSM2_070809weekly - components070809SM2weekly$seasonal
autoplot(tsSM2_070809Adjusted)

Use Holt Winters Exponential Smoothing & Plot

## Holt Winters Exponential Smoothing & Plot
tsSM2_HW070809 <- HoltWinters(tsSM2_070809Adjusted, beta=FALSE, gamma=FALSE)
plot(tsSM2_HW070809, ylim = c(0, 25))

## HoltWinters forecast & plot
tsSM2_HW070809for <- forecast(tsSM2_HW070809, h=25)
plot(tsSM2_HW070809for, ylim = c(0, 20), ylab= "Watt-Hours", xlab="Time - Sub-meter 2")

## Forecast HoltWinters with diminished confidence levels
tsSM2_HW070809forC <- forecast(tsSM2_HW070809, h=25, level=c(10,25))
## Plot only the forecasted area
plot(tsSM2_HW070809forC, ylim = c(0, 20), ylab= "Watt-Hours", xlab="Time - Sub-meter 2", start(2010))

HeaterAC

## Create TS object with SubMeter3
tsSM3_070809weekly <- ts(house070809weekly$Sub_metering_3, frequency=52, start=c(2007,1))

## Plot sub-meter 3 with autoplot - add labels, color
autoplot(tsSM3_070809weekly, ts.colour = 'red', xlab = "Time", ylab = "Watt Hours", main = "HeaterAC")

## Plot sub-meter 3 with plot.ts
plot.ts(tsSM3_070809weekly)

Apply time series linear regression to the sub-meter 3 ts object and use summary to obtain R2 and RMSE from the model you built

fitSM3 <- tslm(tsSM3_070809weekly ~ trend + season) 
summary(fitSM3)
## 
## Call:
## tslm(formula = tsSM3_070809weekly ~ trend + season)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -11.7336  -2.4003  -0.3333   1.7336  20.0000 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  11.88381    3.23073   3.678 0.000374 ***
## trend        -0.03334    0.01153  -2.893 0.004654 ** 
## season2      -4.08348    4.74373  -0.861 0.391319    
## season3      -4.38348    4.74305  -0.924 0.357527    
## season4     -10.01680    4.74239  -2.112 0.037065 *  
## season5      -4.31680    4.74176  -0.910 0.364728    
## season6      -9.95013    4.74115  -2.099 0.038267 *  
## season7      -9.91679    4.74058  -2.092 0.038883 *  
## season8      -4.21678    4.74003  -0.890 0.375729    
## season9      -9.85011    4.73951  -2.078 0.040144 *  
## season10     -9.81677    4.73902  -2.071 0.040789 *  
## season11     -3.78343    4.73856  -0.798 0.426438    
## season12     -3.75009    4.73813  -0.791 0.430471    
## season13     -9.71675    4.73772  -2.051 0.042787 *  
## season14     -9.68341    4.73734  -2.044 0.043475 *  
## season15     -9.31674    4.73699  -1.967 0.051870 .  
## season16     -8.95007    4.73667  -1.890 0.061606 .  
## season17     -9.25006    4.73637  -1.953 0.053509 .  
## season18     -9.21672    4.73611  -1.946 0.054347 .  
## season19     -9.18338    4.73587  -1.939 0.055197 .  
## season20     -3.15004    4.73566  -0.665 0.507411    
## season21     -9.11670    4.73548  -1.925 0.056937 .  
## season22     -8.75003    4.73532  -1.848 0.067470 .  
## season23     -3.38336    4.73520  -0.715 0.476511    
## season24     -9.35002    4.73510  -1.975 0.050963 .  
## season25     -8.98335    4.73503  -1.897 0.060574 .  
## season26     -8.61667    4.73499  -1.820 0.071668 .  
## season27     -9.25000    4.73497  -1.954 0.053441 .  
## season28     -8.88333    4.73499  -1.876 0.063445 .  
## season29     -3.18332    4.73503  -0.672 0.502889    
## season30      0.85002    4.73510   0.180 0.857883    
## season31     -8.44998    4.73520  -1.785 0.077258 .  
## season32     -8.41664    4.73532  -1.777 0.078423 .  
## season33     -3.04996    4.73548  -0.644 0.520950    
## season34     -2.68329    4.73566  -0.567 0.572197    
## season35     -8.31662    4.73587  -1.756 0.082017 .  
## season36     -8.61661    4.73611  -1.819 0.071736 .  
## season37     -2.91661    4.73637  -0.616 0.539379    
## season38     -8.21660    4.73667  -1.735 0.085760 .  
## season39     -2.84993    4.73699  -0.602 0.548728    
## season40     -8.48326    4.73734  -1.791 0.076247 .  
## season41     -8.11658    4.73772  -1.713 0.089658 .  
## season42     -8.41658    4.73813  -1.776 0.078599 .  
## season43     -2.04990    4.73856  -0.433 0.666201    
## season44     -8.34990    4.73902  -1.762 0.081017 .  
## season45     -8.64989    4.73951  -1.825 0.070862 .  
## season46     -2.61655    4.74003  -0.552 0.582125    
## season47     -8.24988    4.74058  -1.740 0.084770 .  
## season48     -8.21654    4.74115  -1.733 0.086055 .  
## season49     -8.18320    4.74176  -1.726 0.087358 .  
## season50     -8.14986    4.74239  -1.719 0.088678 .  
## season51     -2.44986    4.74305  -0.517 0.606591    
## season52     -2.74985    4.74373  -0.580 0.563384    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 6.2 on 104 degrees of freedom
## Multiple R-squared:  0.308,  Adjusted R-squared:  -0.03797 
## F-statistic: 0.8903 on 52 and 104 DF,  p-value: 0.6745
## Create the forecast for sub-meter 3. Forecast ahead 20 time periods 
forecastfitSM3 <- forecast(fitSM3, h=20)
## Plot the forecast for sub-meter 3. 
plot(forecastfitSM3)

## Create sub-meter 3 forecast with confidence levels 80 and 90
forecastfitSM3c <- forecast(fitSM3, h=20, level=c(80,90))

## Plot sub-meter 3 forecast, limit y and add labels
plot(forecastfitSM3c, ylim = c(0, 20), ylab= "Watt-Hours", xlab="Time")

Decomposing sub-meter 3 into trend, seasonal and remainder in order to understand the visualization better

## Decompose sub-meter 3 into trend, seasonal and remainder
components070809SM3weekly <- decompose(tsSM3_070809weekly)
## Plot decomposed sub-meter 3 
plot(components070809SM3weekly)

## Check summary statistics for decomposed sub-meter 3 
summary(components070809SM3weekly)
##          Length Class  Mode     
## x        157    ts     numeric  
## seasonal 157    ts     numeric  
## trend    157    ts     numeric  
## random   157    ts     numeric  
## figure    52    -none- numeric  
## type       1    -none- character
## Seasonal adjusting sub-meter 3 by subtracting the seasonal component & plot
tsSM3_070809Adjusted <- tsSM3_070809weekly - components070809SM3weekly$seasonal
autoplot(tsSM3_070809Adjusted)

Use Holt Winters Exponential Smoothing & Plot

## Holt Winters Exponential Smoothing & Plot
tsSM3_HW070809 <- HoltWinters(tsSM3_070809Adjusted, beta=FALSE, gamma=FALSE)
plot(tsSM3_HW070809, ylim = c(0, 25))

## HoltWinters forecast & plot
tsSM3_HW070809for <- forecast(tsSM3_HW070809, h=25)
plot(tsSM3_HW070809for, ylim = c(0, 20), ylab= "Watt-Hours", xlab="Time - sub-meter 3")

## Forecast HoltWinters with diminished confidence levels
tsSM3_HW070809forC <- forecast(tsSM3_HW070809, h=25, level=c(10,25))
## Plot only the forecasted area
plot(tsSM3_HW070809forC, ylim = c(0, 20), ylab= "Watt-Hours", xlab="Time - sub-meter 3", start(2010))

Other Appliances

## Create TS object with SubMeter0
tsSM0_070809weekly <- ts(house070809weekly$Sub_metering_0, frequency=52, start=c(2007,1))

## Plot sub-meter 0 with autoplot - add labels, color
autoplot(tsSM0_070809weekly, ts.colour = 'red', xlab = "Time", ylab = "Watt Hours", main = "Other Appliances")

## Plot sub-meter 0 with plot.ts
plot.ts(tsSM0_070809weekly)

Apply time series linear regression to the sub-meter 0 ts object and use summary to obtain R2 and RMSE from the model you built

fitSM0 <- tslm(tsSM0_070809weekly ~ trend + season) 
summary(fitSM0)
## 
## Call:
## tslm(formula = tsSM0_070809weekly ~ trend + season)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -22.178  -1.713  -0.198   1.476  27.142 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  20.407057   4.079816   5.002 2.32e-06 ***
## trend        -0.004098   0.014555  -0.282  0.77885    
## season2      -6.985778   5.990460  -1.166  0.24622    
## season3      -2.481680   5.989593  -0.414  0.67948    
## season4      -3.644248   5.988762  -0.609  0.54417    
## season5      -9.151261   5.987966  -1.528  0.12948    
## season6     -11.036052   5.987206  -1.843  0.06814 .  
## season7      -9.854176   5.986480  -1.646  0.10277    
## season8      -3.627857   5.985790  -0.606  0.54578    
## season9     -13.857091   5.985135  -2.315  0.02256 *  
## season10    -11.564106   5.984516  -1.932  0.05604 .  
## season11    -14.104452   5.983932  -2.357  0.02029 *  
## season12    -15.333688   5.983383  -2.563  0.01182 *  
## season13    -11.651812   5.982870  -1.948  0.05417 .  
## season14    -14.492159   5.982392  -2.422  0.01714 *  
## season15    -16.132506   5.981949  -2.697  0.00817 ** 
## season16    -14.383964   5.981542  -2.405  0.01795 *  
## season17    -15.890977   5.981170  -2.657  0.00913 ** 
## season18    -16.097990   5.980833  -2.692  0.00829 ** 
## season19    -16.282781   5.980532  -2.723  0.00760 ** 
## season20    -13.434239   5.980267  -2.246  0.02679 *  
## season21    -15.007919   5.980036  -2.510  0.01363 *  
## season22    -12.937155   5.979842  -2.163  0.03280 *  
## season23    -13.655279   5.979682  -2.284  0.02443 *  
## season24    -15.462292   5.979558  -2.586  0.01110 *  
## season25    -18.402639   5.979470  -3.078  0.00267 ** 
## season26    -18.554097   5.979416  -3.103  0.00247 ** 
## season27    -14.949999   5.979399  -2.500  0.01397 *  
## season28    -16.523679   5.979416  -2.763  0.00677 ** 
## season29    -14.175137   5.979470  -2.371  0.01960 *  
## season30    -15.793261   5.979558  -2.641  0.00953 ** 
## season31    -14.911386   5.979682  -2.494  0.01422 *  
## season32    -16.962843   5.979842  -2.837  0.00548 ** 
## season33    -16.669857   5.980036  -2.788  0.00631 ** 
## season34    -16.521315   5.980267  -2.763  0.00678 ** 
## season35    -18.050550   5.980532  -3.018  0.00320 ** 
## season36    -15.768675   5.980833  -2.637  0.00966 ** 
## season37    -17.953466   5.981170  -3.002  0.00336 ** 
## season38    -13.460479   5.981542  -2.250  0.02653 *  
## season39    -15.434159   5.981949  -2.580  0.01127 *  
## season40    -14.696728   5.982392  -2.457  0.01568 *  
## season41    -15.148186   5.982870  -2.532  0.01284 *  
## season42    -14.444088   5.983383  -2.414  0.01752 *  
## season43    -14.839990   5.983932  -2.480  0.01474 *  
## season44    -14.591448   5.984516  -2.438  0.01646 *  
## season45      2.212649   5.985135   0.370  0.71236    
## season46      6.227858   5.985790   1.040  0.30055    
## season47     -6.212488   5.986480  -1.038  0.30179    
## season48     -4.986168   5.987206  -0.833  0.40686    
## season49      1.962375   5.987966   0.328  0.74378    
## season50      2.588694   5.988762   0.432  0.66645    
## season51     15.381681   5.989593   2.568  0.01165 *  
## season52      7.063558   5.990460   1.179  0.24104    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 7.829 on 104 degrees of freedom
## Multiple R-squared:  0.585,  Adjusted R-squared:  0.3775 
## F-statistic:  2.82 on 52 and 104 DF,  p-value: 3.568e-06
## Create the forecast for sub-meter 0. Forecast ahead 20 time periods 
forecastfitSM0 <- forecast(fitSM0, h=20)
## Plot the forecast for sub-meter 0. 
plot(forecastfitSM0)

## Create sub-meter 0 forecast with confidence levels 80 and 90
forecastfitSM0c <- forecast(fitSM0, h=20, level=c(80,90))

## Plot sub-meter 0 forecast, limit y and add labels
plot(forecastfitSM0c, ylim = c(0, 20), ylab= "Watt-Hours", xlab="Time")

Decomposing sub-meter 0 into trend, seasonal and remainder in order to understand the visualization better

## Decompose sub-meter 0 into trend, seasonal and remainder
components070809SM0weekly <- decompose(tsSM0_070809weekly)
## Plot decomposed sub-meter 0 
plot(components070809SM0weekly)

## Check summary statistics for decomposed sub-meter 0 
summary(components070809SM0weekly)
##          Length Class  Mode     
## x        157    ts     numeric  
## seasonal 157    ts     numeric  
## trend    157    ts     numeric  
## random   157    ts     numeric  
## figure    52    -none- numeric  
## type       1    -none- character
## Seasonal adjusting sub-meter 0 by subtracting the seasonal component & plot
tsSM0_070809Adjusted <- tsSM0_070809weekly - components070809SM0weekly$seasonal
autoplot(tsSM0_070809Adjusted)

Use Holt Winters Exponential Smoothing & Plot

## Holt Winters Exponential Smoothing & Plot
tsSM0_HW070809 <- HoltWinters(tsSM0_070809Adjusted, beta=FALSE, gamma=FALSE)
plot(tsSM0_HW070809, ylim = c(0, 25))

## HoltWinters forecast & plot
tsSM0_HW070809for <- forecast(tsSM0_HW070809, h=25)
plot(tsSM0_HW070809for, ylim = c(0, 20), ylab= "Watt-Hours", xlab="Time - sub-meter 0")

## Forecast HoltWinters with diminished confidence levels
tsSM0_HW070809forC <- forecast(tsSM0_HW070809, h=25, level=c(10,25))
## Plot only the forecasted area
plot(tsSM0_HW070809forC, ylim = c(0, 20), ylab= "Watt-Hours", xlab="Time - sub-meter 0", start(2010))

The R-Sqaured and p-value obtained from the models time series models above

## Table of model errors
Model <- c('R-Squared','p-value')
SM_Kitchen <- c(0.02617, 0.5393)
SM_Laundry <- c(0.02617, 0.5393)
SM_HeatAC <- c(0.308, 0.6745)

metrics <- data.frame(Model, SM_Kitchen, SM_Laundry, SM_HeatAC)
kable(metrics) %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"),fixed_thead = T)
Model SM_Kitchen SM_Laundry SM_HeatAC
R-Squared 0.02617 0.02617 0.3080
p-value 0.53930 0.53930 0.6745