Questions : What is the relationship between sulfur levels and price? How does gravity affect price? What is the average price for sweet crude oil?

library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2)
library(ggthemes)
rm(list = ls())
data <- read.csv("https://raw.githubusercontent.com/ArcticNick/Rdataset/main/USCrudes.csv")
summary(data)
##        X            price          gravity         sulphur     
##  Min.   : 1.0   Min.   :11.03   Min.   : 8.90   Min.   :0.100  
##  1st Qu.:25.5   1st Qu.:14.13   1st Qu.:17.40   1st Qu.:0.500  
##  Median :50.0   Median :15.75   Median :24.20   Median :0.900  
##  Mean   :50.0   Mean   :15.34   Mean   :24.38   Mean   :1.428  
##  3rd Qu.:74.5   3rd Qu.:16.69   3rd Qu.:31.65   3rd Qu.:1.800  
##  Max.   :99.0   Max.   :18.05   Max.   :40.80   Max.   :5.900
# rename the X column to US Crude and sulfur column to sulfur 
data <- rename(data, US_Crude = "X", sulfur = "sulphur")

head(data)
##   US_Crude price gravity sulfur
## 1        1 13.86    14.8    2.0
## 2        2 16.82    31.7    0.7
## 3        3 13.65    13.7    1.0
## 4        4 16.53    29.2    1.6
## 5        5 15.66    19.8    1.2
## 6        6 16.16    27.2    0.7

Context:

Gravity or American Petroleum Institute Gravity (API Gravity) is the measure of how heavy or light a petroleum liquid is compared to water. Generally, higher API gravity is considered lighter oils are more valuable.

Sulfur : less sulfur level are more valuable because sulfur is a contaminant and must be removed in order to refine the crude oil. The process of removing sulfur is expensive and energy-intensive. Crude oil with high sulfur is referred to as “sour” crude and low sulfur is “sweet” crude. Sweet crude is generally more in demand because it is easier and cost-efficient to refine into gasoline and diesel fuel.

# add a column that determine whether the sulfur and gravity level are either sweet or sour and high or low API gravity. Then rearranged the columns using select() so that gravity_level and sulfur_level are next to their respective values.

crude_data <- data %>% 
    mutate(gravity_level = if_else(gravity > 40, "high", if_else(gravity < 20, "low", "medium")), sulfur_level = if_else(sulfur < 0.5, "sweet", if_else(sulfur > 1, "sour", "medium"))) %>% 
    select(US_Crude, price, gravity, gravity_level, sulfur, sulfur_level)


head(crude_data, 10)
##    US_Crude price gravity gravity_level sulfur sulfur_level
## 1         1 13.86    14.8           low    2.0         sour
## 2         2 16.82    31.7        medium    0.7       medium
## 3         3 13.65    13.7           low    1.0       medium
## 4         4 16.53    29.2        medium    1.6         sour
## 5         5 15.66    19.8           low    1.2         sour
## 6         6 16.16    27.2        medium    0.7       medium
## 7         7 18.05    40.8          high    0.1        sweet
## 8         8 14.93    24.2        medium    1.7         sour
## 9         9 16.57    28.3        medium    1.2         sour
## 10       10 16.54    32.6        medium    0.5       medium
# Create subsets of each grade of US crude classifications...
premium_data <- crude_data  %>%
    filter(gravity_level == "high" & sulfur_level == "sweet")

lowgrade_data <- crude_data %>%
    filter(gravity_level == "low" & sulfur_level == "sour")

medium_data <- crude_data %>%
    filter((gravity_level == "medium" & sulfur_level == "medium") | (gravity_level == "high" & sulfur_level == "medium") | (gravity_level == "low" & sulfur_level == "medium") | (gravity_level == "medium" & sulfur_level == "sweet") | (gravity_level == "medium" & sulfur_level == "sour"))

sweet_data <- crude_data %>% 
    filter(sulfur_level == "sweet")
avg_price_sweet <- mean(sweet_data$price)

# show data frames
head(premium_data)
##   US_Crude price gravity gravity_level sulfur sulfur_level
## 1        7 18.05    40.8          high    0.1        sweet
head(medium_data)
##   US_Crude price gravity gravity_level sulfur sulfur_level
## 1        2 16.82    31.7        medium    0.7       medium
## 2        3 13.65    13.7           low    1.0       medium
## 3        4 16.53    29.2        medium    1.6         sour
## 4        6 16.16    27.2        medium    0.7       medium
## 5        8 14.93    24.2        medium    1.7         sour
## 6        9 16.57    28.3        medium    1.2         sour
head(lowgrade_data)
##   US_Crude price gravity gravity_level sulfur sulfur_level
## 1        1 13.86    14.8           low    2.0         sour
## 2        5 15.66    19.8           low    1.2         sour
## 3       12 14.44    15.7           low    4.3         sour
## 4       16 13.62    13.6           low    1.1         sour
## 5       19 13.16    11.3           low    1.7         sour
## 6       22 14.53    18.4           low    1.4         sour
mean_premium <- mean(premium_data$price)
mean_medium <- mean(medium_data$price)
mean_lowgrade <- mean(lowgrade_data$price)

mean_premium
## [1] 18.05
mean_medium
## [1] 15.94427
mean_lowgrade
## [1] 13.24
#Create scatter plot
crude_data %>% ggplot(aes(x = sulfur, y = price)) +
    geom_point(aes(color = sulfur_level)) + 
    geom_smooth(method = lm) + 
    ggtitle("Price to Sulfur Scatter Plot") + 
    labs(x = "Sulfur Values",
         y = "Price") +
    theme_bw()
## `geom_smooth()` using formula = 'y ~ x'

# Create Scatterplot for Price to Gravity 
crude_data %>% ggplot(aes(x = gravity, y = price)) +
    geom_point(aes(color = gravity_level)) +
    geom_smooth(method = lm) +
    labs(x = "Gravity",
         y = "Price") +
    ggtitle("Price to Gravity Scatterplot") +
    theme_clean()
## `geom_smooth()` using formula = 'y ~ x'

# Create histogram
crude_data %>% ggplot(aes(x = gravity)) +
    geom_histogram(binwidth = 2, fill = "#97B3C6") +
    ggtitle("Histogram of Gravity Levels") +
    labs(x = "Gravity Level",
         y = NULL) +
    theme_bw()

# Create box plot
crude_data %>% ggplot(aes(x = gravity_level, y = price, fill = gravity_level)) +
    geom_boxplot() +
    labs(x = "Gravity",
         y = "Price") +
    geom_dotplot(binaxis = "y", 
                 stackdir = "center", 
                 dotsize = 1, 
                 alpha = 0.7) +
    ggtitle("Gravity to Price Boxplot") +
    theme_economist()
## Bin width defaults to 1/30 of the range of the data. Pick better value with
## `binwidth`.

#  calculates correlation
cor(crude_data$sulfur, crude_data$price)
## [1] -0.7722048
cor(crude_data$gravity, crude_data$price)
## [1] 0.9180818
Based on the data set crude_data, we see that there is negative correlation between sulfur levels and price. This means that as sulfur levels increase, the price of the crude oil decreases. This stems from the fact that crude oil with higher sulfur levels are considered to be low quality because it requires more energy and infrastructure to refine; thus making high sulfur crude oil less valuable in the market. On the other hand, we observe a strong positive correlation where price increase as gravity increases. This means that as the API gravity of the crude oil increases, the price also increases. This is a result from crude oil market that considers high API gravity to be better quality and thus more valuable. Furthermore, the average price of "sweet" crude oil is $19.94 which is sightly higher than the mean price of a medium-grade crude oil because low sulfur level crude oil also known as "sweet" is considered to be a higher quality and thus more valuable.