#1 Check you working directory
getwd()
## [1] "/Users/zosiajiang/Desktop/Harrisburg Application - Zhuoxin Jiang/ANLY 565"
#2 Set our working directory to “ANLY 580/RScript”
setwd("~/Desktop/Harrisburg Application - Zhuoxin Jiang/ANLY 565")
#3 Download goy data set posted on Moodle and lable it goy. This data set reperesnets daily prices of gold, oil, and the price of 1 US dollar in terms of Japanese yen. Set the first column in each data set to the date format and the remaining columns in numerical format.
library(readxl)
goy <- read_xls('goy.xls')
dim(goy)
## [1] 879 4
summary(goy)
## observation_date gold oil
## Min. :1946-01-01 00:00:00 Min. : 34.95 Min. : 1.17
## 1st Qu.:1964-04-16 00:00:00 1st Qu.: 276.12 1st Qu.: 2.97
## Median :1982-08-01 00:00:00 Median : 379.80 Median : 16.70
## Mean :1982-08-01 05:44:01 Mean : 522.64 Mean : 24.82
## 3rd Qu.:2000-11-16 00:00:00 3rd Qu.: 630.95 3rd Qu.: 32.83
## Max. :2019-03-01 00:00:00 Max. :1780.65 Max. :133.93
## NA's :268 NA's :1
## yen
## Min. : 76.64
## 1st Qu.:108.11
## Median :122.87
## Mean :160.36
## 3rd Qu.:224.65
## Max. :358.02
## NA's :301
str(goy)
## tibble [879 × 4] (S3: tbl_df/tbl/data.frame)
## $ observation_date: POSIXct[1:879], format: "1946-01-01" "1946-02-01" ...
## $ gold : num [1:879] NA NA NA NA NA NA NA NA NA NA ...
## $ oil : num [1:879] 1.17 1.17 1.17 1.27 1.27 1.27 1.27 1.52 1.52 1.52 ...
## $ yen : num [1:879] NA NA NA NA NA NA NA NA NA NA ...
# Change the date format for observation_date
goy$observation_date <- as.Date(goy$observation_date)
str(goy)
## tibble [879 × 4] (S3: tbl_df/tbl/data.frame)
## $ observation_date: Date[1:879], format: "1946-01-01" "1946-02-01" ...
## $ gold : num [1:879] NA NA NA NA NA NA NA NA NA NA ...
## $ oil : num [1:879] 1.17 1.17 1.17 1.27 1.27 1.27 1.27 1.52 1.52 1.52 ...
## $ yen : num [1:879] NA NA NA NA NA NA NA NA NA NA ...
#4 Create a new data set called “goycc” that contains all complete caises of goy data. Utilize complete.cases function.
goycc <- goy[complete.cases(goy),]
sum(is.na(goycc))
## [1] 0
#5 Create a stand alone variable “date” that takes on values of “observation_date” variable from the goycc data set. Set the mode of the varible to character
date <- as.character(goycc$observation_date)
head(date)
## [1] "1971-01-01" "1971-02-01" "1971-03-01" "1971-04-01" "1971-05-01"
## [6] "1971-06-01"
str(date)
## chr [1:578] "1971-01-01" "1971-02-01" "1971-03-01" "1971-04-01" ...
#6 Find the range of dates covered in goycc data set by applying range() function to “date” variable.
range(date)
## [1] "1971-01-01" "2019-02-01"
#7 Create a time series objected called “goyccts” by utilizing goycc dataset and ts() function. In this dataset please exclude the first column of the goycc dataset.
View(goycc)
df <- goycc[,-1]
goyccts <- ts(df, start = c(1971,1), end = c(2019,2), freq = 12)
str(goyccts)
## Time-Series [1:578, 1:3] from 1971 to 2019: 37.9 38.7 38.9 39 40.5 ...
## - attr(*, "dimnames")=List of 2
## ..$ : NULL
## ..$ : chr [1:3] "gold" "oil" "yen"
#8 Reasign the value of the yen varible from the goyccts data set by conventing the exchange rate of yen that represents the price of 1 US Dollar in terms of Japanese yen to represent the price of 1 Yen in terms of US Dollar. This way if the number increases it represent appriciation of Yen. Hint: Reasign the value of yen variable by taking a reciprocal.
goyccts[,'yen'] <- 1/goyccts[,'yen']
head(goyccts)
## gold oil yen
## [1,] 37.86750 3.56 0.002793140
## [2,] 38.71600 3.56 0.002796851
## [3,] 38.87283 3.56 0.002797057
## [4,] 39.00100 3.56 0.002797178
## [5,] 40.49250 3.56 0.002797884
## [6,] 40.10477 3.56 0.002797893
#9 Plot the time series plot of the three assets. Do you see any trend? Do you see any seasonal component? Yes, Based on the plot below, we can tell that the gold, oil, and yen are all in the positive trend.
plot(goyccts)
#10 Utilize the aggregate function to plot annual prices of the three assets. How does this graph differ from the monthly time series plot? Based on the plot below, the aggregate plot is more smooth than the previous plot, which leaves out the seasonality effect.
plot(aggregate(goyccts))
#11 Find the average summer price of oil for the entire sample.
# define June, July, and August as summer months
summer <- goycc[format.Date(goycc$observation_date, "%m") == '06' | format.Date(goycc$observation_date, "%m") == '07' | format.Date(goycc$observation_date, "%m") == '08',]
mean(summer$oil)
## [1] 37.26092
#12 Find the average winter price of oil for the entire sample.
# define December, January, February as winter months
winter <- goycc[format.Date(goycc$observation_date, "%m") == '12' | format.Date(goycc$observation_date, "%m") == '01' | format.Date(goycc$observation_date, "%m") == '02',]
mean(winter$oil)
## [1] 34.74591
#13 How does the summer price of oil compare to the winter price of oil. Please provide your answer in percentages. Summer price of oil is around 7% more than winter price of oil. The average summer oil price is near 103% of the average. The average winter oil price is near 96% of the average.
(mean(summer$oil)-mean(winter$oil))/mean(winter$oil)
## [1] 0.07238298
summer.ratio <- mean(summer$oil)/mean(goycc$oil)
summer.ratio
## [1] 1.027733
winter.ratio <- mean(winter$oil)/mean(goycc$oil)
winter.ratio
## [1] 0.9583637
#14 Use window() function to create three stand alone variables “gold”, “oil”, and “yen” that take on values of the “gold”, “oil”, and “yen” variables from the goyccts dataset starting from January of 2005
gold <- window(goyccts[,'gold'], start=c(2005,1))
oil <- window(goyccts[,'oil'], start=c(2005,1))
yen <- window(goyccts[,'yen'], start = c(2005,1))
#15 Use plot() and decompose() functions to generate three graphs that would depict the observed values, trends, seasonal, and random components for “gold”, “oil” and “yen” variables. Would you choose multiplicative or additive decomposition model for each of the variables? I choose simple additive decomposition model because the seasonal effect doesn’t increase as the trend increases in this case.
plot(gold)
gold_dec <- decompose(gold, type = 'additive')
plot(gold_dec)
plot(oil)
oil_dec <- decompose(oil, type = 'additive')
plot(oil_dec)
plot(yen)
yen_dec <- decompose(yen, type = 'additive')
plot(yen_dec)
#16 For each of the variables extract the random component and save them as “goldrand”, “oilrand”, and “yenrand”. Moreover, use na.omit() function to deal with the missing values.
goldrand <- na.omit(gold_dec$random)
oilrand <- na.omit(oil_dec$random)
yenrand <- na.omit(yen_dec$random)
#17 For the random component of each of the assets, please estimate autocorrelation function.Does any of the assets exhibit autocorrelation? If yes, to what degree? Keep in mind there are missing values.
Gold, oil, and yen all exhibit autocorrelation.
For gold, it shows the significant autocorrelation for lag value less than 0.7. For oil, it shows the significant autocorrelation for lag value less than 0.7 and lag value equals to 1.6. For yen, it shows the significant autoreelation besides lage value equals to 0.6, 1.1, and 1.6.
acf(goldrand)
acf(oilrand)
acf(yenrand)
#18 For all possible pairs of assets please estimate cross-correlation function, do any of the variable lead or precede each other? Could you use any of the varibales to predict values of other variables? Make sure to use detranded and seasonally adjusted variables. (“goldrand”, “oilrand”, and “yenrand”)
There is not one particular variable that always leads another variables. The leading variable is changing over the time periods. We cannot use the variables to predict other variables since the leading variables are changing.
ts.plot(goldrand, oilrand, col=c("red","blue"))
ccf(goldrand,oilrand)
ts.plot(goldrand, yenrand, col=c("red","blue"))
ccf(goldrand, yenrand)
ts.plot(oilrand, yenrand, col=c("red","blue"))
ccf(oilrand, yenrand)
#19 Based on the time series plot of gold, oil, and yen prices, there appears to be no systematic trends or seasonal effects. Therefore, it is reasonable to use exponential smoothing for these time series. Estimate alpha, the smoothing parameter for gold, oil and yen. What does the value of alpha tell you tell you about the behavior of the mean? What is the estimated value of the mean for each asset?
For gold, the alpha is 0.9999196, which means that there is little smoothing. Estimated mean is 1317.81. For oil, the alpha is 0.9999373, which means that there is little smoothing. Estimated mean is 60.42. For yen, the alpha is 0.9999335, which means that there is little smoothing. Estimated mean is 0.0092.
# Use HoltWinters function with additional parameters set to 0.
gold.hw <- HoltWinters(gold, beta = 0, gamma = 0); gold.hw
## Holt-Winters exponential smoothing with trend and additive seasonal component.
##
## Call:
## HoltWinters(x = gold, beta = 0, gamma = 0)
##
## Smoothing parameters:
## alpha: 0.9999196
## beta : 0
## gamma: 0
##
## Coefficients:
## [,1]
## a 1317.813562
## b 14.375654
## s1 -10.446046
## s2 33.326755
## s3 87.130633
## s4 -3.000999
## s5 -26.579571
## s6 -24.278783
## s7 -16.737978
## s8 -15.299397
## s9 -26.929144
## s10 -11.735742
## s11 12.610796
## s12 1.939475
plot(gold.hw)
oil.hw <- HoltWinters(oil, beta = 0, gamma = 0); oil.hw
## Holt-Winters exponential smoothing with trend and additive seasonal component.
##
## Call:
## HoltWinters(x = oil, beta = 0, gamma = 0)
##
## Smoothing parameters:
## alpha: 0.9999373
## beta : 0
## gamma: 0
##
## Coefficients:
## [,1]
## a 60.4241433
## b 0.8635184
## s1 -4.4704514
## s2 2.5357986
## s3 3.8866319
## s4 3.7537153
## s5 0.2449653
## s6 5.1678819
## s7 4.8407986
## s8 0.5891319
## s9 -5.0542014
## s10 -5.4162847
## s11 -0.6033681
## s12 -5.4746181
plot(oil.hw)
yen.hw <- HoltWinters(yen, beta = 0, gamma = 0); yen.hw
## Holt-Winters exponential smoothing with trend and additive seasonal component.
##
## Call:
## HoltWinters(x = yen, beta = 0, gamma = 0)
##
## Smoothing parameters:
## alpha: 0.9999335
## beta : 0
## gamma: 0
##
## Coefficients:
## [,1]
## a 9.188601e-03
## b -4.127660e-05
## s1 -5.538264e-05
## s2 -9.677855e-06
## s3 4.065366e-04
## s4 1.737083e-04
## s5 -7.941011e-05
## s6 1.152514e-04
## s7 1.479382e-04
## s8 -6.280495e-05
## s9 -2.754848e-04
## s10 -2.384694e-04
## s11 1.170997e-05
## s12 -1.339148e-04
plot(yen.hw)
#20 Use plot() function to generate three graphs that depict observed and exponentially smoothed values for each asset.
plot(gold.hw)
plot(oil.hw)
plot(yen.hw)
#21 Use window() function to create 3 new variables called “goldpre”, “oilpre”, and “yenpre” that covers the period from January 2005 until August 2018.
goldpre <- window(gold, start = c(2005,1), end = c(2018,8))
oilpre <- window(oil, start = c(2005,1), end = c(2018,8))
yenpre <- window(yen, start = c(2005,1), end = c(2018,8))
#22 Use window() function to create 3 new variables called goldpost, oilpost, and yenpost that covers the period from September 2018 until February 2019.
goldpost <- window(gold, start = c(2018,9), end = c(2019,2))
oilpost <- window(oil, start = c(2018,9), end = c(2019,2))
yenpost <- window(yen, start = c(2018,9), end = c(2019,2))
#23 Estimate HoltWinters filter model for each asset, while using only only pre data. Save each of these estimates as “gold.hw”, “oil.hw”, and “yen.hw”.
For gold.hw, alpha is 0.86 and estimated mean is 1198.19. For oil.hw, alpha is 1 and estimated mean is 62.89. For yen.hw, alpha is 0.86 and estimated mean is 0.00873.
gold.hw <- HoltWinters(goldpre, seasonal = 'additive')
oil.hw <- HoltWinters(oilpre, seasonal = 'additive')
yen.hw <- HoltWinters(yenpre, seasonal = 'additive')
#24 Use HoltWinters filter estimates generated in #23 and predict() function to create a 6 month ahead forecast of the gold, oil, and yen prices. Save these forcasted values as “goldforc”, “oilforc”, and “yenforc”.
goldforc <-predict(gold.hw, n.ahead = 6)
oilforc <- predict(oil.hw, n.ahead = 6)
yenforc <- predict(yen.hw, n.ahead = 6)
#25 Use ts.plot() function to plot side-by-side post sample prices (“goldpost”, “oilpost”,“yenpost”) and their forecasted counterparts. Please designate red color to represent the actual prices, and blue doted lines to represent forecasted values.
par(mfrow = c(1,3))
plot1 <- ts.plot(goldpost, goldforc, lty = 1:2, col = c('red','blue'), ylim = c(1050,1320))
plot2 <- ts.plot(oilpost, oilforc,lty = 1:2, col = c('red','blue'))
plot3 <- ts.plot(yenpost, yenforc,lty = 1:2, col = c('red','blue'))
#26 Please calculate forecast mean percentage error for each assets forecasting model. Which asset’s forecasting model has the lowest mean percentage error?
Gold: 5.98 Oil: -7.14 (the lowest) Yen: 3.70
gold_per <- mean(((goldpost - goldforc)/goldpost)*100); gold_per
## [1] 5.98464
oil_per <- mean(((oilpost - oilforc)/oilpost)*100); oil_per
## [1] -7.141958
yen_per <- mean(((yenpost - yenforc)/yenpost)*100); yen_per
## [1] 3.700225
#27 Use gold, oil, and yen variables to estimate HoltWinters model for each asset. Save these estimates as “goldc.hw”, “oilc.hw”, and “yenc.hw”.
goldc.hw <- HoltWinters(gold, seasonal = 'additive')
oilc.hw <- HoltWinters(oil, seasonal = 'additive')
yenc.hw <- HoltWinters(yen, seasonal = 'additive')
#28 Use “goldc.hw”, “oilc.hw”, and “yenc.hw” models to create an out-of-sample forecasts to predict the prices of each of the assets for the rest of the 2019. Save these forecasts as “goldforcos”, “oilforcos”, “yenforcos”. What is the forecasted price of Gold for November 2019?
the forecasted price of Gold for November 2019 is 1276.936 yen.
goldforcoc <-predict(goldc.hw, n.ahead = 10)
oilforcoc <- predict(oilc.hw, n.ahead = 10)
yenforcoc <- predict(yenc.hw, n.ahead = 10)
goldforcoc_10 <- goldforcoc['Nov']
#29 Create time series plots for each asset, that combines the actual price data of each asset and their out-of-sample forecasted values. Please designate red color to represent the actual prices,and blue doted lines to represent forecasted values. What do you think will happen to the price of each asset by the end of the year?
The prices of three assets will drop at the end of the year.
par(mfrow = c(1,3))
plot4 <- ts.plot(gold, goldforcoc, lty = 1:4, col = c('red','blue'))
plot5 <- ts.plot(oil, oilforcoc,lty = 1:4, col = c('red','blue'))
plot6 <- ts.plot(yen, yenforcoc,lty = 1:4, col = c('red','blue'))
#30 Please calculate percentage between the price of each asset in February 2019 and their forecasted December 2019 prices. Which asset promises the highest rate of return?
The price of gold decreases by 4.0% at the year end. The price of oil increases by 5.26% at the year end. (Highest rate of return) The price of yen increases by 0.17% at the year end.
gold_feb <- window(gold, start = c(2019,2))[[1]]
gold_dec <- goldforcoc[[10]]
gold_perc <- gold_feb/gold_dec; gold_perc # decrease by 4.0%.
## [1] 1.039843
oil_feb <- window(oil, start = c(2019,2))[[1]]
oil_dec <- oilforcoc[[10]]
oil_perc <- oil_feb/oil_dec; oil_perc # increase by 5.26%.
## [1] 0.9474179
yen_feb <- window(yen, start = c(2019,2))[[1]]
yen_dec <- yenforcoc[[10]]
yen_perc <- yen_feb/yen_dec; yen_perc # increase by 0.17%.
## [1] 0.9992394