Priviledged and Confidential. Not to be copied or reproduced without the expressed written consent of the author.
Process the time series energy consumption data for a home over a one month duration sampled at 1 second granularity and extract the energy consumption time series for individual appliances listed.
setwd("C:/Users/jlee/Documents/Personal/Bidgely")
library("lubridate")
library("scales")
library("ggplot2")
library("reshape2")
library("dplyr")
##
## Attaching package: 'dplyr'
##
## The following objects are masked from 'package:lubridate':
##
## intersect, setdiff, union
##
## The following object is masked from 'package:stats':
##
## filter
##
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
mydata <- read.csv("data.txt",header=FALSE)
names(mydata) <- c("date_unix","watts")
mydata$date_pos <- as.POSIXct(mydata$date_unix, origin="1970-01-01",tz="US/Pacific")
ggplot(data=mydata,aes(x=date_pos,y=watts))+geom_line(color="steelblue")+ggtitle("Total Energy Consumption of Selected Home\n(Over the Month of August)")+xlab("in Seconds")+ylab("Watts")+ylim(0,15000)+stat_smooth(size=1,color="black")
## geom_smooth: method="auto" and size of largest group is >=1000, so using gam with formula: y ~ s(x, bs = "cs"). Use 'method = x' to change the smoothing method.
We extract energy consumption on the following order, based on appliances with the longest pulse duration: Pool Pump, AC2 Unit, AC1 Unit and finally Refrigerator.
To extract the Pool Pump energy consumtion we look for contiuous pulses at least 1,500 watts for 10,800 seconds (3 hours) on a continuous basis.
mydata$poolstart <- 0 # Indicats when the pool pump starts
mydata$pool <- 0 # Indicats whether the pool pump is ON
mydata$poolwatts <- 0 # The estimated energy consumed by the pool pump when ON
i <- 1
while (i < (dim(mydata)[1] - (60*60*3-1))) {
if (sum(mydata$watts[i:(i+60*60*3-1)] >= 1500) == 60*60*3) {
mydata$pool[i:(i+60*60*3-1)] <- 1
mydata$poolwatts[i:(i+60*60*3-1)] <- 1500
if (mydata$pool[i-1] == 0) {mydata$poolstart[i] <- 1}
i <- i+60*60*3
} else {
i <- i+1}
}
Calculate the number of times the Pool Pump started. And, lets plots the days and hours in which the Pool Pump started. Finally, let’s calculate the % of total energy consumed by the Pool Pump.
# How many times did the Pool Pump start? Answer = 6 times.
sum(mydata$poolstart)
## [1] 6
# Plot of the number of starts by day of month
poolstartdays <- day(mydata$date_pos[mydata$poolstart==1])
hist(poolstartdays,breaks=c(1:31),main="Histogram of Pool Pump Starts by Day of Month",xlab="Day of Month")
axis(side=1, at=seq(1,31,1))
# Plot of the number of starts by hour of day
poolstarthour <- hour(mydata$date_pos[mydata$poolstart==1])
hist(poolstarthour,breaks=c(0:23),main="Histogram of Pool Pump Starts by Hour of Day",xlab="Hour of Day")
axis(side=1, at=seq(0,23,1))
# What % of total did the Pool Pump Consume? About 4.62%
percent(sum(as.numeric(mydata$poolwatts))/sum(as.numeric(mydata$watts)))
## [1] "4.62%"
# Plot the extracted energy consumption
ggplot(data=mydata,aes(x=date_pos,y=poolwatts))+geom_line(color="steelblue")+ggtitle("Pool Pump Energy Consumption")+xlab("in Seconds")+ylab("Watts")+ylim(0,15000)+stat_smooth(size=1,color="black")
## geom_smooth: method="auto" and size of largest group is >=1000, so using gam with formula: y ~ s(x, bs = "cs"). Use 'method = x' to change the smoothing method.
## Warning: Removed 7 rows containing missing values (geom_path).
To extract the AC2 Unit energy consumtion we look for contiuous pulses at least 4,000 watts for 1,800 seconds (30 minutes) on a continuous basis.
mydata$wattremaining <- mydata$watts - mydata$poolwatts # Calculate remaining watts after deducting Pool Pump
mydata$ac2start <- 0 # Indicats when the AC2 unit starts
mydata$ac2 <- 0 # Indicats whether the AC2 unit is ON
mydata$ac2watts <- 0 # The estimated energy consumed by the AC2 unit when ON
i <- 1
while (i < (dim(mydata)[1] - (60*30-1))) {
if (sum(mydata$wattremaining[i:(i+60*30-1)] >= 4000) == 60*30) {
mydata$ac2[i:(i+60*30-1)] <- 1
mydata$ac2watts[i:(i+60*30-1)] <- 4000
if (mydata$ac2[i-1] == 0) {mydata$ac2start[i] <- 1}
i <- i+60*30
} else {
i <- i+1}
}
Calculate the number of times the AC2 Unit started. And, lets plots the days and hours in which the AC2 Unit started. Finally, let’s calculate the % of total energy consumed by the AC2 Unit.
# How many times did the AC2 Unit start? Answer = 170 times.
sum(mydata$ac2start)
## [1] 170
# Plot of the number of starts by day of month
ac2startdays <- day(mydata$date_pos[mydata$ac2start==1])
hist(ac2startdays,breaks=c(1:31),main="Histogram of AC2 Unit Starts by Day of Month",xlab="Day of Month")
axis(side=1, at=seq(1,31,1))
# Plot of the number of starts by hour of day
ac2starthour <- hour(mydata$date_pos[mydata$ac2start==1])
hist(ac2starthour,breaks=c(0:23),main="Histogram of AC2 Unit Starts by Hour of Day",xlab="Hour of Day")
axis(side=1, at=seq(0,23,1))
# What % of total did the AC2 Unit consume? About 22.8%
percent(sum(as.numeric(mydata$ac2watts))/sum(as.numeric(mydata$watts)))
## [1] "22.8%"
# Plot the extracted energy consumption
ggplot(data=mydata,aes(x=date_pos,y=ac2watts))+geom_line(color="steelblue")+ggtitle("AC2 Unit Energy Consumption")+xlab("in Seconds")+ylab("Watts")+ylim(0,15000)+stat_smooth(size=1,color="black")
## geom_smooth: method="auto" and size of largest group is >=1000, so using gam with formula: y ~ s(x, bs = "cs"). Use 'method = x' to change the smoothing method.
To extract the AC1 Unit energy consumtion we look for contiuous pulses at least 2,500 watts for 600 seconds (10 minutes) on a continuous basis.
mydata$wattremaining <- mydata$wattremaining - mydata$ac2watts # Calculate remaining watts after deducting AC2 Unit
mydata$ac1start <- 0 # Indicats when the AC1 unit starts
mydata$ac1 <- 0 # Indicats whether the AC1 unit is ON
mydata$ac1watts <- 0 # The estimated energy consumed by the AC2 unit when ON
i <- 1
while (i < (dim(mydata)[1] - (60*10-1))) {
if (sum(mydata$wattremaining[i:(i+60*10-1)] >= 2500) == 60*10) {
mydata$ac1[i:(i+60*10-1)] <- 1
mydata$ac1watts[i:(i+60*10-1)] <- 2500
if (mydata$ac1[i-1] == 0) {mydata$ac1start[i] <- 1}
i <- i+60*10
} else {
i <- i+1}
}
Calculate the number of times the AC1 Unit started. And, lets plots the days and hours in which the AC1 Unit started. Finally, let’s calculate the % of total energy consumed by the AC1 Unit.
# How many times did the AC1 Unit start? Answer = 762 times.
sum(mydata$ac1start)
## [1] 762
# Plot of the number of starts by day of month
ac1startdays <- day(mydata$date_pos[mydata$ac1start==1])
hist(ac1startdays,breaks=c(1:31),main="Histogram of AC1 Unit Starts by Day of Month",xlab="Day of Month")
axis(side=1, at=seq(1,31,1))
# Plot of the number of starts by hour of day
ac1starthours <- hour(mydata$date_pos[mydata$ac1start==1])
hist(ac1starthours,breaks=c(0:23),main="Histogram of AC1 Unit Starts by Hour of Day",xlab="Hour of Day")
axis(side=1, at=seq(0,23,1))
# What % of total did the AC1 Unit consume? About 19.6%
percent(sum(as.numeric(mydata$ac1watts))/sum(as.numeric(mydata$watts)))
## [1] "19.6%"
# Plot the extracted energy consumption
ggplot(data=mydata,aes(x=date_pos,y=ac1watts))+geom_line(color="steelblue")+ggtitle("AC1 Unit Energy Consumption")+xlab("in Seconds")+ylab("Watts")+ylim(0,15000)+stat_smooth(size=1,color="black")
## geom_smooth: method="auto" and size of largest group is >=1000, so using gam with formula: y ~ s(x, bs = "cs"). Use 'method = x' to change the smoothing method.
We assume that the refrigerator consumes continuiously at 200 Watts or less. So subtract 200 or less from each second.
mydata$wattremaining <- mydata$wattremaining - mydata$ac1watts # Calculate remaining watts after deducting AC1 Unit
mydata$refwatts <- pmin(mydata$wattremaining,rep(200,2676545))
# Plot the extracted energy consumption
ggplot(data=mydata,aes(x=date_pos,y=refwatts))+geom_line(color="steelblue")+ggtitle("Refrigerator Energy Consumption")+xlab("in Seconds")+ylab("Watts")+ylim(0,15000)+stat_smooth(size=1,color="black")
## geom_smooth: method="auto" and size of largest group is >=1000, so using gam with formula: y ~ s(x, bs = "cs"). Use 'method = x' to change the smoothing method.
# What % of total does the AC1 Unit consume? About 6.53%
percent(sum(as.numeric(mydata$refwatts))/sum(as.numeric(mydata$watts)))
## [1] "6.53%"
Lets create an area plot of extracted energy consumption of each appliance, along with the residual.
mydata$wattremaining <- mydata$wattremaining - mydata$refwatts # Calculate remaining watts after deducting the refrigerator
# Plot the extracted energy consumption
ggplot(data=mydata,aes(x=date_pos,y=wattremaining))+geom_line(color="steelblue")+ggtitle("Residual Energy Consumption")+xlab("in Seconds")+ylab("Watts")+ylim(0,15000)+stat_smooth(size=1,color="black")
## geom_smooth: method="auto" and size of largest group is >=1000, so using gam with formula: y ~ s(x, bs = "cs"). Use 'method = x' to change the smoothing method.