# Packages and Functions
knitr::opts_chunk$set(echo = TRUE)
knitr::opts_chunk$set(message = FALSE)
knitr::opts_chunk$set(warnings = FALSE)
library(tidyverse)
library(readxl)
library(sf)
library(tmap)

source("functions.R")
# -------------------------------- All 1``````  Variables -------------------------------
# egrid1 = selecting required sheet from excel
# egrid2 = unit conversion and selection and renaming of required cols
# egrid3 = negated NAs and making new variables using mutate() + ifelse + is.na()
# coef   = grams of CO2 released per 1,000 btu of fuel burned or mmt of CO2 released per quadrillion btu of fuel burned
# egrid4 = left joined coef values 
# egrid4state = state level grouping
# egrid4statefuel = state level grouping per fuel 
# egrid5 = spatial dataframe using lat long and crs
# ------------------------------------------------------------------------------


# -------------------------------- Data wrangling ------------------------------

# read excel --- specify sheet within Excel file, number of rows to skip before reading data, 
# and assumes the first row has column names followed by rows of data
egrid1 = read_excel("egrid2021_data_metric.xlsx",sheet="PLNT21",skip=1)

# Unit conversions and selecting only rewuired columns
egrid2 = egrid1 %>% 
  tolow() %>% 
  filter(pstatabb == "GA" | pstatabb == "AL") %>% 
  mutate(heatinbtum = plhtian,
         netgengwh = plngenan/10^3,
         epaemimmt = round(plco2eqa/10^6,1)) %>% 
  select(plantname = pname,
         stateabbr = pstatabb,
         fuel = plfuelct,
         nameplatecapmw = namepcap,
         heatinbtum,
         netgengwh,
         epaemimmt,
         lat, lon)

# Negating NAs and making new variables using mutate() + ifelse + is.na()
egrid3 = egrid2 %>% 
  mutate(nameplatecapmw = ifelse(is.na(nameplatecapmw), 0, nameplatecapmw),
         heatinbtum  =    ifelse(is.na(heatinbtum), 0, heatinbtum),
         netgengwh =      ifelse(is.na(netgengwh), 0, netgengwh),
         epaemimmt =      ifelse(is.na(epaemimmt), 0, epaemimmt),
         capfactor = 100 * ((netgengwh*10^3) / 
                              (nameplatecapmw*365*24)))

# Reading new variable having CO2 coeffs
coef = read_excel("egrid-coef.xlsx")

# Introducing left_join() to join columns from a second table based on matching values of a shared column/columns
egrid4 = egrid3 %>% 
  left_join(coef) %>% 
  mutate(calcemimmt = heatinbtum*10^3 * coef / 10^12,
         calctoepa = 100*round(calcemimmt/epaemimmt,4))

# Aggregating individual plant level data to state level by using group_by() and summarize()
egrid4state = egrid4 %>% 
  group_by(stateabbr) %>% 
  summarize(netgengwh = sum(netgengwh,na.rm=T),
            nameplatecapmw = sum(nameplatecapmw,na.rm=T),
            epaemimmt = sum(epaemimmt,na.rm=T),
            calcemimmt = sum(calcemimmt,na.rm=T))

# and calculating statewide capacity factors for different fuels
egrid4statefuel = egrid4 %>% 
  group_by(stateabbr,fuel) %>% 
  summarize(netgengwh = sum(netgengwh,na.rm=T),
            nameplatecapmw = sum(nameplatecapmw,na.rm=T),
            epaemimmt = sum(epaemimmt,na.rm=T),
            calcemimmt = sum(calcemimmt,na.rm=T)) %>% 
  mutate(capfactor = 100 * ((netgengwh*10^3) / 
                              (nameplatecapmw*365*24)))


# -------------------------------- Data mapping --------------------------------

# using the lon and lat fields to build a spatial dataframe of points using crs
egrid5 = egrid4 %>% 
  filter(!(is.na(lat) | is.na(lon))) %>% 
  st_as_sf(coords = c("lon", "lat"), 
           crs = 4326)

# Creating an interactive map
tmap_mode("view")

# plotting spatial dataframe points on Open Street Map basemap
tm_basemap("OpenStreetMap") +
  tm_shape(egrid5) +
  tm_dots()
# coloring dots by fuel field and sizing dots by net generation in GWh
tm_basemap("OpenStreetMap") +
  tm_shape(egrid5) +
  tm_dots(col="fuelcolor",size="netgengwh")
# adding base imagery and set layer opacity (opposite of transparency) to 50%
tm_basemap(providers$Esri.WorldImagery) +
  tm_shape(egrid5) +
  tm_dots(col="fuelcolor",size="netgengwh",alpha=.50)
# nesting dplyr filter command within tm_shape() to limit plants to solar ones
tm_basemap("OpenStreetMap") +
  tm_shape(egrid5 %>% filter(fuel=="SOLAR")) +
  tm_dots(col="fuelcolor", size="nameplatecapmw")
# mapping dots with size based on emissions and saving to a tmap object named m to an interactive html file 8” high
m = tm_basemap("OpenStreetMap") +
  tm_shape(egrid5) +
  tm_dots(col="fuelcolor", size="calcemimmt")

tmap_save(m,"elecemissions.html",height=8)