library(data.table)
library(lubridate)
## Warning: package 'lubridate' was built under R version 3.4.4
##
## Attaching package: 'lubridate'
## The following objects are masked from 'package:data.table':
##
## hour, isoweek, mday, minute, month, quarter, second, wday,
## week, yday, year
## The following object is masked from 'package:base':
##
## date
library(dplyr)
## Warning: package 'dplyr' was built under R version 3.4.4
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:lubridate':
##
## intersect, setdiff, union
## The following objects are masked from 'package:data.table':
##
## between, first, last
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(knitr)
## Warning: package 'knitr' was built under R version 3.4.3
library(ggplot2)
Smart meter data
#load in smart meter data
smart_meter <- fread("~/Desktop/PhD/Thesis/Book_thesis/99_New_Data/oldham_sm.csv", select = c(3:57))
#group by DATE and postcode sector
# Find a 12 month date range
date_range <- range(smart_meter$readdate) # Make sure to check this when working on full dataset
date_range
## [1] "2015-01-01" "2015-12-31"
# Only take dates that fall within a full 12 months (not necessary for sample)
#smart_meter %>%
# filter(between(readdate, as.Date("2015-01-01"), as.Date("2015-12-31"))) -> smart_meter_2015
smart_meter_2015 <- smart_meter
rm(smart_meter)
#Sum 365 daily averages for annual total.
Date_totals <- smart_meter_2015 %>%
group_by(address_postcodesec, readdate) %>%
summarise(thatdate_average = mean(daily_usage))
Annual_total <- Date_totals %>%
group_by(address_postcodesec) %>%
summarise(annual_usage_SM = sum(thatdate_average)) # sum 365 days to generate an annual total
EPC data
epc <- fread("~/Desktop/PhD/Thesis/Book_thesis/99_New_Data/epc_col_cleaned.csv")
#Only keep most up to date EPC record for each address
epc %>%
group_by(BUILDING_REFERENCE_NUMBER) %>% #to preserve full addresses in cases such as "flat 2". Not brilliant because numbers cannot be guarenteed to be unique but best that can be done given the data until changes are made.
filter(LODGEMENT_DATE == max(LODGEMENT_DATE)) -> epc_unique
## Warning: package 'bindrcpp' was built under R version 3.4.4
#calculate total kilowatt hours for each EPC - Energy_consumption_current is calculated kWh/m2. This makes it comparable to smart meter data.
epc_unique$TOTAL_KWH <- epc_unique$ENERGY_CONSUMPTION_CURRENT*epc_unique$TOTAL_FLOOR_AREA
#Only take records that fall within the same 12 month range as smart_meter to make it comparible
epc_unique %>%
filter(LODGEMENT_DATE >= as.Date("2015-01-01") & LODGEMENT_DATE <= as.Date("2015-12-31")) -> epc_2015
## Warning in strptime(xx, f <- "%Y-%m-%d", tz = "GMT"): unknown timezone
## 'default/Europe/London'
#Group by postcode sector and take the mean as they are already an annual average
epc_2015 %>%
group_by(PCS) %>%
summarise(annual_usage_epc = mean(TOTAL_KWH, na.rm = TRUE)) -> epc_pcs_annual
rm(epc)
#Compare EPC usage to smart meter usage
Annual_comparison <- merge(epc_pcs_annual, Annual_total, by.x = "PCS", by.y="address_postcodesec")
Annual_comparison$percentage_change <- (Annual_comparison$annual_usage_SM - Annual_comparison$annual_usage_epc)/Annual_comparison$annual_usage_epc*100
head(Annual_comparison)
## PCS annual_usage_epc annual_usage_SM percentage_change
## 1 M241 23439.67 2235650 9437.891
## 2 M242 24045.50 2171167 8929.412
## 3 M350 23540.84 2113715 8878.930
## 4 M359 25390.10 2202945 8576.395
## 5 M400 19223.00 2088778 10766.036
## 6 M403 29377.20 2194400 7369.739
#write.csv(Annual_comparison, "./99_New_Data/Annualconsumption.csv")
Mapping (QGIS)
img1_path <- "../../05_The_EUC/"
knitr::include_graphics(file.path(img1_path, "Smart_meter_annual.jpeg"))

knitr::include_graphics(file.path(img1_path, "EPC_annual.jpeg"))

knitr::include_graphics(file.path(img1_path, "Percentage_diff_annual.jpeg"))

Correlation
ggplot(Annual_comparison, aes(x=annual_usage_epc, y=annual_usage_SM)) +
geom_point(shape=0) + # 0 = hollow squares
geom_smooth(method=lm) # Add linear regression line

# (by default includes 95% confidence region)
lm(annual_usage_epc ~ annual_usage_SM, data = Annual_comparison)
##
## Call:
## lm(formula = annual_usage_epc ~ annual_usage_SM, data = Annual_comparison)
##
## Coefficients:
## (Intercept) annual_usage_SM
## -3.379e+03 1.446e-02