This dataset is from Susanna Wong’s week 5 discussion. The dataset shows gasoline prices for 17 regions in New York from 2017 to present. For analysis: “Create a line graph to view the trend or compare the prices of the gasoline for each region. We can view if the prices increases or decreases overall”

library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.4.0      ✔ purrr   1.0.0 
## ✔ tibble  3.1.8      ✔ dplyr   1.0.10
## ✔ tidyr   1.2.1      ✔ stringr 1.5.0 
## ✔ readr   2.1.3      ✔ forcats 0.5.2 
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()

#Importing the CSV and converting Date to datetime format

df <- read.csv("https://raw.githubusercontent.com/LeJQC/MSDS/main/DATA%20607/Project%202/Gasoline_Retail_Prices_Weekly_Average_by_Region__Beginning_2007.csv")

df$Date <- as.Date(df$Date,format ="%m/%d/%Y")
head(df)
##         Date New.York.State.Average....gal. Albany.Average....gal.
## 1 2023-02-27                           3.47                   3.51
## 2 2023-02-20                           3.51                   3.54
## 3 2023-02-13                           3.54                   3.85
## 4 2023-02-06                           3.57                   3.61
## 5 2023-01-30                           3.56                   3.59
## 6 2023-01-23                           3.48                   3.48
##   Batavia.Average....gal. Binghamton.Average....gal. Buffalo.Average....gal.
## 1                    3.38                       3.41                    3.39
## 2                    3.38                       3.45                    3.41
## 3                    3.72                       3.77                    3.75
## 4                    3.44                       3.52                    3.45
## 5                    3.43                       3.50                    3.45
## 6                    3.37                       3.45                    3.41
##   Dutchess.Average....gal. Elmira.Average....gal. Glens.Falls.Average....gal.
## 1                     3.56                   3.40                        3.62
## 2                     3.59                   3.43                        3.63
## 3                     3.90                   3.77                        3.88
## 4                     3.63                   3.51                        3.69
## 5                     3.62                   3.47                        3.67
## 6                     3.54                   3.37                        3.58
##   Ithaca.Average....gal. Kingston.Average....gal. Nassau.Average....gal.
## 1                   3.47                     3.38                   3.36
## 2                   3.50                     3.40                   3.42
## 3                   3.82                     3.74                   3.73
## 4                   3.53                     3.49                   3.58
## 5                   3.51                     3.47                   3.49
## 6                   3.49                     3.38                   3.37
##   New.York.City.Average....gal. Rochester.Average....gal.
## 1                          3.48                      3.45
## 2                          3.54                      3.47
## 3                          3.82                      3.78
## 4                          3.64                      3.52
## 5                          3.62                      3.51
## 6                          3.53                      3.46
##   Syracuse.Average....gal. Utica.Average....gal. Watertown.Average....gal.
## 1                     3.45                  3.50                      3.48
## 2                     3.47                  3.53                      3.50
## 3                     3.81                  3.87                      3.86
## 4                     3.52                  3.62                      3.56
## 5                     3.50                  3.62                      3.54
## 6                     3.44                  3.59                      3.47
##   White.Plains.Average....gal.
## 1                         3.56
## 2                         3.61
## 3                         3.94
## 4                         3.66
## 5                         3.66
## 6                         3.58

#Converting data to a long format

df_long <- df %>% 
  pivot_longer(
    cols = -Date,
    names_to = "Region",
    values_to = "Average Price per gallon") 
head(df_long)
## # A tibble: 6 × 3
##   Date       Region                         `Average Price per gallon`
##   <date>     <chr>                                               <dbl>
## 1 2023-02-27 New.York.State.Average....gal.                       3.47
## 2 2023-02-27 Albany.Average....gal.                               3.51
## 3 2023-02-27 Batavia.Average....gal.                              3.38
## 4 2023-02-27 Binghamton.Average....gal.                           3.41
## 5 2023-02-27 Buffalo.Average....gal.                              3.39
## 6 2023-02-27 Dutchess.Average....gal.                             3.56

#Cleaning up the data

#Removing ".Average....gal" and then the replace the periods with spaces
df_long$Region <- gsub(".Average....gal.", "",df_long$Region)
df_long$Region <- gsub("\\.", " ",df_long$Region)

head(df_long)
## # A tibble: 6 × 3
##   Date       Region         `Average Price per gallon`
##   <date>     <chr>                               <dbl>
## 1 2023-02-27 New York State                       3.47
## 2 2023-02-27 Albany                               3.51
## 3 2023-02-27 Batavia                              3.38
## 4 2023-02-27 Binghamton                           3.41
## 5 2023-02-27 Buffalo                              3.39
## 6 2023-02-27 Dutchess                             3.56

#Looking just at Albany to see if everything is good so far

df_long %>% 
  subset(Region == "Albany") %>% 
  ggplot(aes(x=Date, y= `Average Price per gallon`))+
  geom_point()

#Plotting all the Regions individually with a line graph

df_long %>%
  ggplot(aes(x=Date, y=`Average Price per gallon`, color=Region),na.rm = TRUE)+
  geom_line(na.rm = TRUE)+
  facet_wrap(~Region)

#Plotting all the regions on top of each other

df_long %>%
  ggplot(aes(x=Date, y=`Average Price per gallon`, color=Region),na.rm = TRUE)+
  #geom_point(size = 0.5, alpha = 0.5,na.rm = TRUE)+
  geom_line(na.rm = TRUE)+
  theme_bw()+
  labs(title = "Average Gas Prices of 17 Regions in NY", y = "Average Price per Gallon")

Looking at the graph, the average gas prices of each region is pretty similar. They all follow the same ups and and downs. Right before 2010, all the gas prices seem to have fell significantly. This was probably due to the 2008 market crash. All the gas prices seem to rise quickly right after 2020, this could be attributed to everyone going back to work after the COVID pandemic. However, there is no obvious trend that I can see.