Topic: Correlation between Gold and Silver Prices

Introduction:

Silver and gold are close substitutes for one another. Both have been used as currency in the pas. There is significant evidence that these metals being an attractive investment and can play a useful role in diversifying risk. There are also economic fundamentals that may act to drive the prices of silver and gold apart.

Research question:

The main objective of this project is to study the relationship and correlation between prices of silver and gold in commodity market.

Cases:

Each case represents the price of silver and gold for a day. The full dataset represents data for 48 years with approximately 12618 cases.

Data Collection:

The data is collected from quandl.com

Type of study:

This is observational study.

Response:

Price of gold is the response variable. It is numerical continuous variable.

Explanatory:

The explanatory variable is the price of silver and is numerical.

Setup

library(tidyr)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2)
library(sqldf)
## Loading required package: gsubfn
## Loading required package: proto
## Could not load tcltk.  Will use slower R code instead.
## Loading required package: RSQLite
library(magrittr)
## 
## Attaching package: 'magrittr'
## The following object is masked from 'package:tidyr':
## 
##     extract
library(DT)

Silver and Gold price data are loaded from the datasets

gold_df <- read.csv(file="GOLD.csv", head=TRUE, sep=",",stringsAsFactors = FALSE) 
datatable(head(gold_df))
silver_df = read.csv(file="SILVER.csv", head=TRUE, sep=",",stringsAsFactors = FALSE)
datatable(head(silver_df))

Data Transformation, Cleanup and Preparation:

Apply tidyr select function and select only columns (Date and USD..AM Price) form gold_df data set that are relevant for our purpose

select(gold_df)
## data frame with 0 columns and 12618 rows
gold_dataSet = select (gold_df, matches("Date|USD..AM"))

Apply tidyr mutate functin to identify the price of the metal as gold

gold_dataSet = mutate (gold_dataSet, MetalPrice = "GoldPrice")
colnames(gold_dataSet)[2] <- "USD"
datatable(head(gold_dataSet))

Apply tidyr select function and select only columns (Date and USD Price) form silver_df data set that are relevant for our purpose. Also Apply tidyr mutate function to identify the price of the metal as Silver

select(silver_df)
## data frame with 0 columns and 12628 rows
Silver_dataSet = select (silver_df, matches("Date|USD"))
silver_df$Date = as.Date(as.character(silver_df$Date))
## Warning in strptime(xx, f <- "%Y-%m-%d", tz = "GMT"): unknown timezone
## 'zone/tz/2018c.1.0/zoneinfo/America/New_York'
Silver_dataSet = mutate (Silver_dataSet, MetalPrice = "SilverPrice")
datatable(head(Silver_dataSet))

Now combine both data frames (Silver and Gold price data frames) into one consolidated data frame for analysis

Silver_dataSet <- Silver_dataSet[1:nrow(gold_dataSet),1:3]
gold_dataSet <- gold_dataSet[,1:3]
#do.call(rbind, gold_dataSet)
combinedDataSets = rbind(gold_dataSet, Silver_dataSet)
datatable(head(combinedDataSets))

Apply arrange function to sort the data by Date

combinedDataSets = arrange(combinedDataSets, Date)
datatable(head(combinedDataSets))

Apply spread function to spread the data for each metal by adding one column for Gold and another for Silver

spreadedTotal = spread(combinedDataSets, MetalPrice, USD)
head(spreadedTotal, 20)
##          Date GoldPrice SilverPrice
## 1  1968-01-02     35.18          NA
## 2  1968-01-03     35.16          NA
## 3  1968-01-04     35.14          NA
## 4  1968-01-05     35.14          NA
## 5  1968-01-08     35.14          NA
## 6  1968-01-09     35.14          NA
## 7  1968-01-10     35.15          NA
## 8  1968-01-11     35.17          NA
## 9  1968-01-12     35.18          NA
## 10 1968-01-15     35.18          NA
## 11 1968-01-16     35.19       2.069
## 12 1968-01-17     35.20       2.067
## 13 1968-01-18     35.20       2.058
## 14 1968-01-19     35.19       2.078
## 15 1968-01-22     35.19       2.088
## 16 1968-01-23     35.19       2.080
## 17 1968-01-24     35.20       2.064
## 18 1968-01-25     35.20       2.018
## 19 1968-01-26     35.20       2.069
## 20 1968-01-29     35.19       2.039

Create a function to calculate the price ratio and round it to 4 decimal digits

calcPriceRatio = function(goldPrice, silverPrice)
{
  round((goldPrice/silverPrice), 4)
  }

Apply tidyr mutate function to add PriceRatio and transactionYear columns

dataSetsWithPriceRatio = mutate(spreadedTotal, PriceRatio=calcPriceRatio(GoldPrice, SilverPrice), transactionYear=substring(Date,1,4))
tail(dataSetsWithPriceRatio, 20)
##             Date GoldPrice SilverPrice PriceRatio transactionYear
## 12611 2017-11-03   1275.30      17.085    74.6444            2017
## 12612 2017-11-06   1271.60      16.915    75.1759            2017
## 12613 2017-11-07   1276.35      17.005    75.0573            2017
## 12614 2017-11-08   1282.25      16.995    75.4487            2017
## 12615 2017-11-09   1284.00      17.100    75.0877            2017
## 12616 2017-11-10   1284.45      17.000    75.5559            2017
## 12617 2017-11-13   1278.40      16.925    75.5332            2017
## 12618 2017-11-14   1273.70      16.935    75.2111            2017
## 12619 2017-11-15   1285.70      17.115    75.1212            2017
## 12620 2017-11-16   1277.70      17.040    74.9824            2017
## 12621 2017-11-17   1283.85      17.085    75.1449            2017
## 12622 2017-11-20   1292.35      17.145    75.3777            2017
## 12623 2017-11-21   1280.00      17.000    75.2941            2017
## 12624 2017-11-22   1283.95      16.965    75.6823            2017
## 12625 2017-11-23   1290.15      17.095    75.4694            2017
## 12626 2017-11-24   1289.15      17.050    75.6100            2017
## 12627 2017-11-27   1294.70      17.100    75.7135            2017
## 12628 2017-11-28   1293.90      17.070    75.7996            2017
## 12629 2017-11-29   1294.85      16.895    76.6410            2017
## 12630 2017-11-30   1282.15      16.570    77.3778            2017

Select price data for years from 2001 to 2013

PriceDataYear_2001_2013 = filter(dataSetsWithPriceRatio, transactionYear %in% c("2001", "2002", "2003", "2004", "2005", "2006", "2007", "2008", "2009", "2010", "2011", "2012", "2013"))
tail(PriceDataYear_2001_2013, 20)
##            Date GoldPrice SilverPrice PriceRatio transactionYear
## 3266 2013-12-02   1237.50       19.75    62.6582            2013
## 3267 2013-12-03   1219.00       19.17    63.5889            2013
## 3268 2013-12-04   1213.00       19.05    63.6745            2013
## 3269 2013-12-05   1234.00       19.46    63.4121            2013
## 3270 2013-12-06   1230.75       19.49    63.1478            2013
## 3271 2013-12-09   1228.50       19.50    63.0000            2013
## 3272 2013-12-10   1245.75       20.05    62.1322            2013
## 3273 2013-12-11   1255.25       20.39    61.5620            2013
## 3274 2013-12-12   1243.50       19.80    62.8030            2013
## 3275 2013-12-13   1222.75       19.55    62.5448            2013
## 3276 2013-12-16   1229.50       19.50    63.0513            2013
## 3277 2013-12-17   1237.25       20.02    61.8007            2013
## 3278 2013-12-18   1233.25       19.94    61.8480            2013
## 3279 2013-12-19   1205.25       19.34    62.3190            2013
## 3280 2013-12-20   1195.00       19.33    61.8210            2013
## 3281 2013-12-23   1192.75       19.37    61.5772            2013
## 3282 2013-12-24   1196.50       19.40    61.6753            2013
## 3283 2013-12-27   1209.25       19.92    60.7053            2013
## 3284 2013-12-30   1201.50       19.65    61.1450            2013
## 3285 2013-12-31   1201.50       19.50    61.6154            2013
summary(PriceDataYear_2001_2013)
##      Date             GoldPrice       SilverPrice       PriceRatio   
##  Length:3285        Min.   : 256.7   Min.   : 4.065   Min.   :31.44  
##  Class :character   1st Qu.: 399.3   1st Qu.: 6.200   1st Qu.:53.19  
##  Mode  :character   Median : 673.6   Median :12.850   Median :60.22  
##                     Mean   : 832.2   Mean   :14.807   Mean   :60.04  
##                     3rd Qu.:1243.0   3rd Qu.:19.650   3rd Qu.:65.73  
##                     Max.   :1896.5   Max.   :48.700   Max.   :83.79  
##  transactionYear   
##  Length:3285       
##  Class :character  
##  Mode  :character  
##                    
##                    
## 

Data Visualization:

Gold Price Data

ggplot (gold_df, aes(as.Date(Date), USD..AM.)) +
  geom_line (aes(color="Gold")) +
  labs (color="Legend") +
  scale_colour_manual ("", breaks = c("gold"), values = c("goldenrod3")) +
  ggtitle ("Gold Prices") + theme (plot.title = element_text(lineheight=0.7, face="bold"))

Gold Price Histogram

ggplot(data=gold_df, aes(gold_df$USD..AM.)) +
  geom_histogram(aes(fill = ..count..)) +
  scale_fill_gradient("Count", low = "yellow", high = "goldenrod3")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 1 rows containing non-finite values (stat_bin).

Silver Price Data

ggplot (silver_df, aes(as.Date(Date), USD)) +
  geom_line (aes(color="Silver")) +
  labs (color="Legend") +
  scale_colour_manual ("", breaks = c("silver"), values = c("ivory4")) +
  ggtitle ("Silver Prices") +
  theme (plot.title = element_text(lineheight=0.7, face="bold"))

Silver Price Histogram

ggplot(data=silver_df, aes(silver_df$USD)) +
  geom_histogram(aes(fill = ..count..)) +
  scale_fill_gradient("Count", low = "grey", high = "ivory4")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 19 rows containing non-finite values (stat_bin).

Gold/Silver Price Ratio Data

ggplot (PriceDataYear_2001_2013, aes(as.Date(Date), PriceRatio)) +
  geom_line (aes(color="Blue")) +
  labs (color="Legend") +
  scale_colour_manual ("", breaks = c("gold"), values = c("goldenrod3")) +
  ggtitle ("Gold and Silver Price Ratio") +
  theme (plot.title = element_text(lineheight=0.7, face="bold"))

Gold/Silver Price Ratio Histogram

ggplot(data=PriceDataYear_2001_2013, aes(PriceDataYear_2001_2013$PriceRatio)) +
  geom_histogram(aes(fill = ..count..)) +
  scale_fill_gradient("Count", low = "grey", high = "blue") +
  ggtitle ("Gold and Silver Price Ratio Histogram")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.