# ==============================================================================
# 1. Descriptive Statistics: Dependent Variable by Key Independent Variable
# ==============================================================================
library(dplyr)
# Calculate the mean and median of annual electricity consumption (Econs2012)
# grouped by the Energy Efficiency Band (EE_BAND).
dv_by_iv_summary <- need_clean %>%
group_by(EE_BAND) %>%
summarise(
mean_econs = mean(Econs2012, na.rm = TRUE),
median_econs = median(Econs2012, na.rm = TRUE)
)
# Display the initial answer to the Research Question
print("Average and median electricity consumption by EPC Band:")
## [1] "Average and median electricity consumption by EPC Band:"
dv_by_iv_summary
## # A tibble: 6 × 3
## EE_BAND mean_econs median_econs
## <ord> <dbl> <dbl>
## 1 A/B 2375. 1900
## 2 C 3055. 2600
## 3 D 3511. 3050
## 4 E 3859. 3300
## 5 F 4382. 3650
## 6 G 4242. 3350
# ==============================================================================
# 2. Descriptive Statistics: Control Variables by Key Independent Variable
# ==============================================================================
# We use a cross-tabulation to examine the rival explanations.
# prop.table(2) calculates the column proportions. We round to 4 decimal places
# and multiply by 100 to present the output as percentages.
# --- Cross-tabulation 1: Property Type by EE_BAND ---
type_by_band <- table(need_clean$PROP_TYPE, need_clean$EE_BAND) %>%
prop.table(2) %>%
round(4) * 100
print("Percentage of Property Types within each EPC Band:")
## [1] "Percentage of Property Types within each EPC Band:"
type_by_band
##
## A/B C D E F G
## Detached 3.41 9.35 15.36 15.28 26.03 18.78
## Semi-detached 6.44 18.77 26.45 31.39 28.65 26.85
## End terrace 3.78 8.94 9.56 13.34 14.74 14.95
## Mid terrace 10.59 22.28 23.96 21.74 15.91 29.04
## Bungalow 2.91 6.36 10.78 9.60 8.84 5.27
## Flat (inc. maisonette) 72.86 34.30 13.90 8.65 5.82 5.10
# --- Cross-tabulation 2: Property Age by EE_BAND ---
age_by_band <- table(need_clean$PROP_AGE, need_clean$EE_BAND) %>%
prop.table(2) %>%
round(4) * 100
print("Percentage of Property Age categories within each EPC Band:")
## [1] "Percentage of Property Age categories within each EPC Band:"
age_by_band
##
## A/B C D E F G
## Before 1930 1.99 8.95 25.76 45.64 60.60 62.04
## 1930–1949 2.15 9.32 17.38 22.90 21.29 22.45
## 1950–1966 10.44 19.52 19.64 16.18 11.84 11.48
## 1967–1982 19.87 24.38 20.25 12.68 5.85 3.44
## 1983–1995 12.04 15.23 12.23 2.42 0.32 0.46
## 1996 onwards 53.51 22.61 4.74 0.18 0.10 0.13
# --- Cross-tabulation 3: Wall Construction by EE_BAND ---
wall_by_band <- table(need_clean$WALL_CONS, need_clean$EE_BAND) %>%
prop.table(2) %>%
round(4) * 100
print("Percentage of Wall Construction types within each EPC Band:")
## [1] "Percentage of Wall Construction types within each EPC Band:"
wall_by_band
##
## A/B C D E F G
## Cavity wall 77.67 81.32 66.65 43.96 30.45 29.85
## Other 22.33 18.68 33.35 56.04 69.55 70.15
# Note: You can replicate the same syntax for other control variables
# such as REGION or FLOOR_AREA_BAND if needed.