INTRODUCTION

Data: Table C1. Energy Consumption Overview: Estimates by Energy Source and End-Use Sector, 2015

Units are in Trillion Btu

US Energy Information Administration

PREPARE

Load required lirbaries.

library(knitr)
library(stringr)
library(tidyr)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2)
library(DT)

LOAD DATA

Load raw data file into R

energy_all <- read.csv("~/Desktop/EIA-source-use.csv", header = FALSE, stringsAsFactors = FALSE, check.names = FALSE)
energy_all[1:10,]
##             V1           V2           V3          V4        V5
## 1                                Sources                      
## 2                           Fossil_Fuels                      
## 3        State Total_Energy         Coal Natural_Gas Petroleum
## 4      Alabama        1,910          494         696       523
## 5       Alaska          620           20         337       242
## 6      Arizona        1,440          386         366       507
## 7     Arkansas        1,056          227         297       315
## 8   California        7,676           31       2,382     3,362
## 9     Colorado        1,480          340         493       466
## 10 Connecticut          752            7         261       315
##                   V6                     V7               V8
## 1                                                           
## 2                                                           
## 3  Total_Fossil_Fuel Nuclear_Electric_Power Renewable_Energy
## 4              1,714                    439              271
## 5                598                      0               22
## 6              1,258                    340              147
## 7                839                    145              122
## 8              5,775                    194              856
## 9              1,298                      0              128
## 10               582                    182               41
##                                V9                     V10             V11
## 1                                                                        
## 2                                                         End-Use_Sectors
## 3  Interstate_Flow_of_Electricity Net_Electricity_Imports     Residential
## 4                            -514                       0             353
## 5                               0                       0              49
## 6                            -306                       0             384
## 7                             -50                       0             230
## 8                             806                      47            1357
## 9                              54                     (s)             343
## 10                            -55                       2             249
##           V12        V13            V14
## 1                                      
## 2                                      
## 3  Commercial Industrial Transportation
## 4         264        808            485
## 5          66        333            172
## 6         347        235            474
## 7         180        371            275
## 8        1465       1837           3017
## 9         290        439            408
## 10        199         76            228

This shows the header is across three rows

Read the file again without the top two header rows

energy_raw <- read.csv("~/Desktop/EIA-source-use.csv", header = TRUE, skip = 2, stringsAsFactors = FALSE, check.names = FALSE)
energy_raw[1:10,]
##            State Total_Energy Coal Natural_Gas Petroleum Total_Fossil_Fuel
## 1        Alabama        1,910  494         696       523             1,714
## 2         Alaska          620   20         337       242               598
## 3        Arizona        1,440  386         366       507             1,258
## 4       Arkansas        1,056  227         297       315               839
## 5     California        7,676   31       2,382     3,362             5,775
## 6       Colorado        1,480  340         493       466             1,298
## 7    Connecticut          752    7         261       315               582
## 8       Delaware          278    7         108       102               217
## 9  Dist. of Col.          179  (s)          34        21                55
## 10       Florida        4,242  467       1,366     1,622             3,455
##    Nuclear_Electric_Power Renewable_Energy Interstate_Flow_of_Electricity
## 1                     439              271                           -514
## 2                       0               22                              0
## 3                     340              147                           -306
## 4                     145              122                            -50
## 5                     194              856                            806
## 6                       0              128                             54
## 7                     182               41                            -55
## 8                       0                8                             53
## 9                       0                2                            122
## 10                    294              301                            192
##    Net_Electricity_Imports Residential Commercial Industrial
## 1                        0         353        264        808
## 2                        0          49         66        333
## 3                        0         384        347        235
## 4                        0         230        180        371
## 5                       47        1357       1465       1837
## 6                      (s)         343        290        439
## 7                        2         249        199         76
## 8                        0          66         57         89
## 9                        0          43        109          7
## 10                       0        1219       1018        487
##    Transportation
## 1             485
## 2             172
## 3             474
## 4             275
## 5            3017
## 6             408
## 7             228
## 8              66
## 9              21
## 10           1517

SUBSET DATA

Compare states by looking at the consumption of energy by the four types by removing the source columns

energy_use <- energy_raw[,-2:-10]
energy_use[1:10,]
##            State Residential Commercial Industrial Transportation
## 1        Alabama         353        264        808            485
## 2         Alaska          49         66        333            172
## 3        Arizona         384        347        235            474
## 4       Arkansas         230        180        371            275
## 5     California        1357       1465       1837           3017
## 6       Colorado         343        290        439            408
## 7    Connecticut         249        199         76            228
## 8       Delaware          66         57         89             66
## 9  Dist. of Col.          43        109          7             21
## 10       Florida        1219       1018        487           1517

TIDY DATA

Gather and tidy data around the different categories of usage

energy_use_tidy <- gather(energy_use, "Use_Type", "Total_By_Type", Residential:Transportation) %>% 
  arrange(State)
datatable(energy_use_tidy)

CALCULATE TOTAL

Calculate the total energy consumed for each state by adding every four rows

total_state <- colSums(matrix(energy_use_tidy$Total_By_Type, nrow=4))

Create a vector with the total value repeated four times to add to the working data set

total_state_4 <- rep(total_state, each = 4)
total_state_4 
##   [1]  1910  1910  1910  1910   620   620   620   620  1440  1440  1440
##  [12]  1440  1056  1056  1056  1056  7676  7676  7676  7676  1480  1480
##  [23]  1480  1480   752   752   752   752   278   278   278   278   180
##  [34]   180   180   180  4241  4241  4241  4241  2852  2852  2852  2852
##  [45]   282   282   282   282   524   524   524   524  3943  3943  3943
##  [56]  3943  2848  2848  2848  2848  1495  1495  1495  1495  1084  1084
##  [67]  1084  1084  1725  1725  1725  1725  4259  4259  4259  4259   406
##  [78]   406   406   406  1400  1400  1400  1400  1445  1445  1445  1445
##  [89]  2767  2767  2767  2767  1770  1770  1770  1770  1133  1133  1133
## [100]  1133  1828  1828  1828  1828   392   392   392   392   853   853
## [111]   853   853   649   649   649   649   306   306   306   306  2288
## [122]  2288  2288  2288   677   677   677   677  3725  3725  3725  3725
## [133]  2524  2524  2524  2524   608   608   608   608  3741  3741  3741
## [144]  3741  1631  1631  1631  1631   957   957   957   957  3882  3882
## [155]  3882  3882   203   203   203   203  1649  1649  1649  1649   384
## [166]   384   384   384  2168  2168  2168  2168 12897 12897 12897 12897
## [177]   791   791   791   791   133   133   133   133  2369  2369  2369
## [188]  2369  1989  1989  1989  1989   775   775   775   775  1776  1776
## [199]  1776  1776   524   524   524   524
energy_use_tidy$Total_All <- total_state_4 
datatable(energy_use_tidy)

CALCULATE PERCENTAGE

Calculate the percentage contribution for each use type against the total

energy_use_perc <- energy_use_tidy %>%
  mutate(Percent_Total = Total_By_Type / Total_All)

Round to shorten the Percentage of Total calculation to two digits

energy_use_perc[,"Percent_Total"]=format(round(energy_use_perc[,"Percent_Total"]*100,2),nsmall=2)
datatable(energy_use_perc)

ANALYZE DATA

Various analyses can be performed at this point, such as identifying the state that has the maximum percentage contribution from one of the four sectors

energy_use_perc$State[which.max(energy_use_perc[,"Percent_Total"])]
## [1] "Louisiana"
energy_use_perc$Use_Type[which.max(energy_use_perc[,"Percent_Total"])]
## [1] "Industrial"
paste(max(energy_use_perc[,"Percent_Total"]),"%",sep="")
## [1] "69.59%"

Similarily looking at which state has the minimum consumption percentage from any one sector

energy_use_perc$State[which.min(energy_use_perc[,"Percent_Total"])]
## [1] "Dist. of Col."
energy_use_perc$Use_Type[which.min(energy_use_perc[,"Percent_Total"])]
## [1] "Industrial"
paste(min(energy_use_perc[,"Percent_Total"]),"%",sep="")
## [1] " 3.89%"

Additionally, we can look at which state has the highest total consumption percentage from any one sector

energy_use_perc$State[which.max(energy_use_perc[,"Total_By_Type"])]
## [1] "Texas"
energy_use_perc$Use_Type[which.max(energy_use_perc[,"Total_By_Type"])]
## [1] "Industrial"
max(energy_use_perc[,"Total_By_Type"])
## [1] 6459
paste(energy_use_perc$Percent_Total[which.max(energy_use_perc[,"Total_By_Type"])],"%",sep="")
## [1] "50.08%"

Additionally, we can look at which state has the highest total consumption from any one sector

energy_use_perc$State[which.max(energy_use_perc[,"Total_By_Type"])]
## [1] "Texas"
energy_use_perc$Use_Type[which.max(energy_use_perc[,"Total_By_Type"])]
## [1] "Industrial"
max(energy_use_perc[,"Total_By_Type"])
## [1] 6459