library(data.table)
library(lubridate)
## Warning: package 'lubridate' was built under R version 3.4.4
## 
## Attaching package: 'lubridate'
## The following objects are masked from 'package:data.table':
## 
##     hour, isoweek, mday, minute, month, quarter, second, wday,
##     week, yday, year
## The following object is masked from 'package:base':
## 
##     date
library(dplyr)
## Warning: package 'dplyr' was built under R version 3.4.4
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:lubridate':
## 
##     intersect, setdiff, union
## The following objects are masked from 'package:data.table':
## 
##     between, first, last
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(knitr)
## Warning: package 'knitr' was built under R version 3.4.3
library(ggplot2)

Smart meter data

#load in smart meter data
smart_meter <- fread("~/Desktop/PhD/Thesis/Book_thesis/99_New_Data/oldham_sm.csv", select = c(3:57))
#group by DATE and postcode sector
# Find a 12 month date range 
date_range <- range(smart_meter$readdate) # Make sure to check this when working on full dataset
date_range
## [1] "2015-01-01" "2015-12-31"
# Only take dates that fall within a full 12 months (not necessary for sample)
#smart_meter %>% 
 #  filter(between(readdate, as.Date("2015-01-01"), as.Date("2015-12-31"))) -> smart_meter_2015


smart_meter_2015 <- smart_meter
rm(smart_meter)
#Sum 365 daily averages for annual total. 
Date_totals <- smart_meter_2015 %>% 
  group_by(address_postcodesec, readdate) %>% 
  summarise(thatdate_average = mean(daily_usage))

Annual_total <- Date_totals %>% 
  group_by(address_postcodesec) %>% 
  summarise(annual_usage_SM = sum(thatdate_average)) # sum 365 days to generate an annual total

EPC data

epc <- fread("~/Desktop/PhD/Thesis/Book_thesis/99_New_Data/epc_col_cleaned.csv")
#Only keep most up to date EPC record for each address
epc %>% 
  group_by(BUILDING_REFERENCE_NUMBER) %>%  #to preserve full addresses in cases such as "flat 2". Not brilliant because numbers cannot be guarenteed to be unique but best that can be done given the data until changes are made.
  filter(LODGEMENT_DATE == max(LODGEMENT_DATE)) -> epc_unique
## Warning: package 'bindrcpp' was built under R version 3.4.4
#calculate total kilowatt hours for each EPC - Energy_consumption_current is calculated kWh/m2. This makes it comparable to smart meter data.
epc_unique$TOTAL_KWH <- epc_unique$ENERGY_CONSUMPTION_CURRENT*epc_unique$TOTAL_FLOOR_AREA
#Only take records that fall within the same 12 month range as smart_meter to make it comparible
epc_unique %>% 
  filter(LODGEMENT_DATE >= as.Date("2015-01-01") & LODGEMENT_DATE <= as.Date("2015-12-31")) -> epc_2015
## Warning in strptime(xx, f <- "%Y-%m-%d", tz = "GMT"): unknown timezone
## 'default/Europe/London'
#Group by postcode sector and take the mean as they are already an annual average
epc_2015 %>% 
  group_by(PCS) %>% 
  summarise(annual_usage_epc = mean(TOTAL_KWH, na.rm = TRUE)) -> epc_pcs_annual

rm(epc)
#Compare EPC usage to smart meter usage
Annual_comparison <- merge(epc_pcs_annual, Annual_total, by.x = "PCS", by.y="address_postcodesec")
Annual_comparison$percentage_change <- (Annual_comparison$annual_usage_SM - Annual_comparison$annual_usage_epc)/Annual_comparison$annual_usage_epc*100
head(Annual_comparison)
##    PCS annual_usage_epc annual_usage_SM percentage_change
## 1 M241         23439.67         2235650          9437.891
## 2 M242         24045.50         2171167          8929.412
## 3 M350         23540.84         2113715          8878.930
## 4 M359         25390.10         2202945          8576.395
## 5 M400         19223.00         2088778         10766.036
## 6 M403         29377.20         2194400          7369.739
#write.csv(Annual_comparison, "./99_New_Data/Annualconsumption.csv")

Mapping (QGIS)

img1_path <- "../../05_The_EUC/"
knitr::include_graphics(file.path(img1_path, "Smart_meter_annual.jpeg"))

knitr::include_graphics(file.path(img1_path, "EPC_annual.jpeg"))

knitr::include_graphics(file.path(img1_path, "Percentage_diff_annual.jpeg"))

Correlation

ggplot(Annual_comparison, aes(x=annual_usage_epc, y=annual_usage_SM)) +
  geom_point(shape=0) +    # 0 = hollow squares
  geom_smooth(method=lm)   # Add linear regression line 

#  (by default includes 95% confidence region)
lm(annual_usage_epc ~ annual_usage_SM, data = Annual_comparison)
## 
## Call:
## lm(formula = annual_usage_epc ~ annual_usage_SM, data = Annual_comparison)
## 
## Coefficients:
##     (Intercept)  annual_usage_SM  
##      -3.379e+03        1.446e-02