End-User Energy Consumption Across European Countries

Fossil Fuel and Renewable’s - 2008/2019

Jonah Hughson S3584913

Last updated: 25 October, 2020

Introduction

Introduction Cont.

Problem Statement

Data

Preprocessing

Fossil_Fuel <- read_xlsx("Energy EU.xlsx", sheet = 4, skip = 9)
Fossil_Fuel <- Fossil_Fuel[-c(1:4, 45:47), c(1,2, 20)] #Dropped unused Rows and Columns
colnames(Fossil_Fuel)[1] <- "Country"
Fossil_Fuel <- Fossil_Fuel %>% gather('2009', '2018', key = 'Year', value = 'TOE')  
Fossil_Fuel$TOE <-as.double(Fossil_Fuel$TOE) 
Fossil_Fuel$TOE <-  round(Fossil_Fuel$TOE, digits = 2)
Fossil_Fuel$Year <- factor(Fossil_Fuel$Year, levels = c("2009", "2018"))

Renewables <- read_xlsx("Energy EU.xlsx", sheet = 10, skip = 9)
Renewables <- Renewables[-c(1:4, 45:47), c(1,2, 20)]
colnames(Renewables)[1] <- "Country"
Renewables <- Renewables %>% gather('2009', '2018', key = 'Year', value = 'TOE')
Renewables$TOE <-as.double(Renewables$TOE) 
Renewables$TOE <-  round(Renewables$TOE, digits = 2)
Renewables$Year <- factor(Renewables$Year, levels = c("2009", "2018"))

Descriptive Statistics and Visualisation

Fossil_Sum <- Fossil_Fuel %>% group_by(Year) %>% summarise(Min = min(TOE , na.rm = TRUE),
     Q1 = quantile(TOE , probs = .25,na.rm = TRUE),
     Median = median(TOE , na.rm = TRUE),
     Q3 = quantile(TOE , probs = .75,na.rm = TRUE),
     Max = max(TOE , na.rm = TRUE),
     Mean = mean(TOE , na.rm = TRUE),
     SD = sd(TOE , na.rm = TRUE),
     n = n(),
     Missing = sum(is.na(TOE)))
kable(Fossil_Sum)
Year Min Q1 Median Q3 Max Mean SD n Missing
2009 0 74.4725 207.265 579.6050 13217.32 1221.262 2863.897 40 2
2018 0 78.3525 256.955 513.6375 10708.13 999.342 2308.217 40 0

Descriptives for Renewable Sample

Renewable_Sum <- Renewables %>% group_by(Year) %>% summarise(Min = min(TOE , na.rm = TRUE),
        Q1 = quantile(TOE , probs = .25,na.rm = TRUE),
        Median = median(TOE , na.rm = TRUE),
        Q3 = quantile(TOE , probs = .75,na.rm = TRUE),
        Max = max(TOE , na.rm = TRUE),
        Mean = mean(TOE , na.rm = TRUE),
        SD = sd(TOE , na.rm = TRUE),
        n = n(),
        Missing = sum(is.na(TOE)))
kable(Renewable_Sum)
Year Min Q1 Median Q3 Max Mean SD n Missing
2009 0.00 341.0800 1152.71 3531.293 13693.49 2429.326 3233.011 40 2
2018 27.88 463.7275 1541.10 3797.035 15782.28 2936.750 3811.011 40 0

Identified two NAs from 2009, Bosnia and Georgia (in both energy samples) due to data not being available (referenced on Eurostat website). Decided to exclude these two countries for hypothesis testing.

Box-Plot of differences of scores

Renewable_D <- as.data.frame(Renewable_D)
fossil_D <- as.data.frame(fossil_D)
boxplot(fossil_D$fossil_D, Renewable_D$Renewable_D, at=c(1,2), names = c("Fossil Fuel", "Renewables"), ylab= "TOE Difference (2009-2018)")

Descriptives Interpretation.

Hypothesis Testing

\[H_0: \mu_Δ = 0 \]

\[H_A: \mu_Δ \ne 0\] - The assumption for paired t-test’s requires the differences in scores to be normally distributed

Assumption Testing, Fossil Fuel

fossil_D <- as.data.frame(fossil_D)
fossil_D <- fossil_D[-c(35),]#Removed Turkey
qqPlot(fossil_D, dist="norm")

## [1] 21  3

Assumption Testing, Renewables

Renewable_D <- Renewable_D[-c(28,35),] #Removed UK and Turkey
Renewable_D <- as.data.frame(Renewable_D)
qqPlot(Renewable_D$Renewable_D, dist= "norm")

## [1] 27 10

Assumption Testing Cont

– The distribution of difference scores highlighted by the QQ-Plot for the Fossil Fuel sample displayed 5 potential sources of outliers. However, as the sample size >30 (n=37), it was decided to keep these extreme cases in, although the results should be interpreted with caution

Paired T-test, Fossil Fuel

summary(fossil_D)
##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
## -1038.20   -61.44   -15.82   -93.22    29.35   152.70
t.test(fossil_D, mu= 0, alternative = "two.sided")
## 
##  One Sample t-test
## 
## data:  fossil_D
## t = -2.1978, df = 36, p-value = 0.03448
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  -179.234386   -7.199669
## sample estimates:
## mean of x 
## -93.21703

Paired T-test, Renewables

summary(Renewable_D)
##   Renewable_D     
##  Min.   :-221.75  
##  1st Qu.:  20.29  
##  Median : 162.15  
##  Mean   : 611.20  
##  3rd Qu.: 812.28  
##  Max.   :2786.20
t.test(Renewable_D, mu= 0, alternative = "two.sided")
## 
##  One Sample t-test
## 
## data:  Renewable_D
## t = 4.3445, df = 35, p-value = 0.000114
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  325.5984 896.7944
## sample estimates:
## mean of x 
##  611.1964

Results

Fossil Fuels

Renewables

Discussion

Conclusion

References