Load the csv file into R.
library(tidyr)
library(dplyr)
library(DT)
raw_data <- read.csv('https://raw.githubusercontent.com/suswong/DATA-607-Project-2/main/Gasoline_Retail_Prices_Weekly_Average_by_Region__Beginning_2007.csv')
raw_data$Date <- as.Date(raw_data$Date,
format = "%m/%d/%Y")
datatable(raw_data)
newtable <- raw_data
colnames(newtable) <- c("Date", "New.York.State", "Albany","Batavia","Binghamton","Buffalo", "Dutchess", "Elmira,","Glens.Falls","Ithaca", "Kingston","Nassau","New.York.City","Rochester", "Syracuse","Utica","Watertown","White.Plains")
Pivot the table so that each color is in a row.
colnames(raw_data)
## [1] "Date" "New.York.State.Average....gal."
## [3] "Albany.Average....gal." "Batavia.Average....gal."
## [5] "Binghamton.Average....gal." "Buffalo.Average....gal."
## [7] "Dutchess.Average....gal." "Elmira.Average....gal."
## [9] "Glens.Falls.Average....gal." "Ithaca.Average....gal."
## [11] "Kingston.Average....gal." "Nassau.Average....gal."
## [13] "New.York.City.Average....gal." "Rochester.Average....gal."
## [15] "Syracuse.Average....gal." "Utica.Average....gal."
## [17] "Watertown.Average....gal." "White.Plains.Average....gal."
library(ggplot2)
long_table <- newtable %>%
pivot_longer(cols = !c('Date'),names_to = "region", values_to = "average_price" )
datatable(long_table)
colnames(raw_data)
## [1] "Date" "New.York.State.Average....gal."
## [3] "Albany.Average....gal." "Batavia.Average....gal."
## [5] "Binghamton.Average....gal." "Buffalo.Average....gal."
## [7] "Dutchess.Average....gal." "Elmira.Average....gal."
## [9] "Glens.Falls.Average....gal." "Ithaca.Average....gal."
## [11] "Kingston.Average....gal." "Nassau.Average....gal."
## [13] "New.York.City.Average....gal." "Rochester.Average....gal."
## [15] "Syracuse.Average....gal." "Utica.Average....gal."
## [17] "Watertown.Average....gal." "White.Plains.Average....gal."
library(ggplot2)
long_table <- newtable %>%
pivot_longer(cols = !c('Date'), names_to = "region", values_to = "average_price" )
datatable(long_table)
Overall, we can see most region follows the same trend. Overall, there is a increase in average weekly prices from mid 2007 to around 2012 for all region. There is a decrease in average weekly prices from around 2012 to around 2017. The average weekly prices increase from 2017 to present.
library(ggplot2)
ggplot(long_table, aes(x = Date, y = average_price, colour = region)) +
geom_smooth() + scale_y_continuous(labels = function(x) format(x, scientific = FALSE))
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## Warning: Removed 3792 rows containing non-finite values (stat_smooth).
The US crude oil annual prices from 1900 can be found here: https://www.eia.gov/dnav/pet/hist/LeafHandler.ashx?n=pet&s=f000000__3&f=a
The website provided a XLS file of the crude oil prices. The file was save as a csv file.
crude_oil_raw_data <- read.csv('https://raw.githubusercontent.com/suswong/DATA-607-Project-2/main/crude%20oil%20prices%20from%20eia.gov.csv')
Remove the first two rows and rename the columns.
colnames(crude_oil_raw_data) <- c("Year", "Price")
crude_oil <- crude_oil_raw_data
tidy_crude_oil <- crude_oil [-1,]
tidy_crude_oil <- tidy_crude_oil [-1,]
filtered_crude_oil <- tidy_crude_oil[as.numeric(tidy_crude_oil$Year) >2006,]
datatable(filtered_crude_oil)
# filtered_crude_oil <- tidy_crude_oil %>%
# filter(Year >2006)
# datatable(filtered_crude_oil)
The annual US crude oil prices have similar trends as the average weekly gasoline prices in NY.
ggplot(filtered_crude_oil, aes(x = Year, y = Price, group=1)) +
geom_line()+ ggtitle("Annual Crude Oil Price from 2007 to 2022") +
xlab("Year") +
ylab("US Crude Oil Price (Dollars/Barrel)")
ggplot(long_table, aes(x = Date, y = average_price, colour = region)) +
geom_line() + scale_y_continuous(labels = function(x) format(x, scientific = FALSE)) + ggtitle("Average Weekly Gasoline Prices in Different NY Region from 2007 to Present") +
ylab("Average Weekly Gasoline Prices") +
xlab("Year")
## Warning: Removed 3792 row(s) containing missing values (geom_path).