Files and data stored at: https://github.com/rweberc/Data607_Project2
Net actual interchange (electric): The algebraic sum of all metered interchange over all interconnections between two physically Adjacent Balancing Authority Areas. NERC definition https://www.eia.gov/tools/glossary/?id=electricity
elecDf <- read.csv("https://raw.githubusercontent.com/rweberc/Data607_Project2/master/electricity.csv", stringsAsFactors = FALSE, colClasses = "character", skip = 4)stopifnot(all(elecDf$megawatthours[c(FALSE, TRUE, FALSE, FALSE)] == "Demand"))
stopifnot(all(elecDf$megawatthours[c(FALSE, FALSE, TRUE, FALSE)] == "Net generation"))
stopifnot(all(elecDf$megawatthours[c(FALSE, FALSE, FALSE, TRUE)] == "Total net actual interchange"))elecDf$region <- NA
elecDf$region[c(TRUE, FALSE, FALSE, FALSE)] <- elecDf$megawatthours[c(TRUE, FALSE, FALSE, FALSE)]
elecDf <- elecDf %>% fill(region)
table(elecDf$region, useNA = "ifany")##
## California (region)
## 4
## Carolinas (region)
## 4
## Central (region)
## 4
## Electric Reliability Council of Texas
## 4
## Florida (region)
## 4
## Mid-Atlantic (region)
## 4
## Midwest (region)
## 4
## New England ISO (region)
## 4
## New York Independent System Operator (region)
## 4
## Northwest (region)
## 4
## Southeast (region)
## 4
## Southwest (region)
## 4
## Tennessee Valley Authority (region)
## 4
## United States Lower 48 (region)
## 4
elecDf$region[c(TRUE, FALSE, FALSE, FALSE)] <- ""elecDf <- elecDf %>% rename(category = megawatthours)
elecDf <- elecDf %>% select(category, region, everything())
colnames(elecDf) <- str_replace(colnames(elecDf), "X", "")
colnames(elecDf) <- str_replace_all(colnames(elecDf), "\\.", "/")
colnames(elecDf)## [1] "category" "region" "01/01/2018" "01/02/2018" "01/03/2018"
## [6] "01/04/2018" "01/05/2018" "01/06/2018" "01/07/2018" "01/08/2018"
## [11] "01/09/2018" "01/10/2018" "01/11/2018" "01/12/2018" "01/13/2018"
## [16] "01/14/2018" "01/15/2018" "01/16/2018" "01/17/2018" "01/18/2018"
## [21] "01/19/2018" "01/20/2018" "01/21/2018" "01/22/2018" "01/23/2018"
## [26] "01/24/2018" "01/25/2018" "01/26/2018" "01/27/2018" "01/28/2018"
## [31] "01/29/2018" "01/30/2018" "01/31/2018"
elecDf_long <- elecDf %>% gather("evalDay", "megawatthours", 3:length(.))# data check
elecDf_long$megawatthours[elecDf_long$megawatthours == " Inc. (region)"] <- ""
stopifnot(elecDf_long$megawatthours[str_trim(elecDf_long$region) == ""] == "")
# filter out unneed "region" rows
elecDf_long <- elecDf_long %>% filter(str_trim(region) != "")
# parse day and megawatthours
elecDf_long$evalDay <- mdy(elecDf_long$evalDay)
elecDf_long$megawatthours <- as.numeric(elecDf_long$megawatthours)elecDf <- elecDf_long %>% spread(category, megawatthours)
rm(elecDf_long)
# data check: confirm no megawatt hours are NA
stopifnot(nrow(elecDf %>% filter_if(is.Date, any_vars(is.na(.)))) == 0)perRegionAvg <- elecDf %>%
group_by(region) %>%
summarize(demandAvg = mean(Demand),
netGenAvg = mean(`Net generation`),
totInterchgAvg = mean(`Total net actual interchange`))
ggplot(perRegionAvg, aes(x=region, y=totInterchgAvg)) +
geom_bar(stat = "identity") +
labs(title = "Avg Interchange values", x = "Region", y = "Averge Interchange") +
theme(plot.title = element_text(hjust = 0.5)) +
theme(axis.text.x = element_text(angle = -90, hjust = 0, vjust = 0))California looks to have the greatest deficits, while the northwest region looks to have the greatest surplus.
dayDf <- elecDf %>%
ungroup() %>%
mutate(evalDayOfWeek = wday(evalDay, label = TRUE)) %>%
group_by(region, evalDayOfWeek) %>%
summarize(demandAvg = mean(Demand),
netGenAvg = mean(`Net generation`),
totInterchgAvg = mean(`Total net actual interchange`))
ggplot(dayDf, aes(x = region, y = totInterchgAvg)) +
geom_bar(aes(fill = evalDayOfWeek), position = "dodge", stat = "identity") +
labs(title = "Avg Interchange values", x = "Region", y = "Averge Interchange") +
theme(plot.title = element_text(hjust = 0.5)) +
theme(axis.text.x = element_text(angle = -90, hjust = 0, vjust = 0))Net values do not seem dependent on days.