library(readxl)
#1  Check you working directory

getwd()
## [1] "/Users/krupakarekar/ANLY 565/RScipt"
#2  Set your working directory to "ANLY 580/RScript"

setwd("/Users/krupakarekar/ANLY\ 565/RScipt")
 #Download goy data set posted on Moodle  and lable it goy. This data set reperesnets daily prices of gold, oil, and the price of 1 US dollar in terms of Japanese yen. Set the first column in each data set to the date format and the remaining columns in numerical format.

goy <- read_xls('goy.xls')  
dim(goy)
## [1] 879   4
str(goy)
## tibble [879 × 4] (S3: tbl_df/tbl/data.frame)
##  $ observation_date: POSIXct[1:879], format: "1946-01-01" "1946-02-01" ...
##  $ gold            : num [1:879] NA NA NA NA NA NA NA NA NA NA ...
##  $ oil             : num [1:879] 1.17 1.17 1.17 1.27 1.27 1.27 1.27 1.52 1.52 1.52 ...
##  $ yen             : num [1:879] NA NA NA NA NA NA NA NA NA NA ...
#changing format fo 1st column:

goy$observation_date <- as.Date(goy$observation_date)

str(goy)
## tibble [879 × 4] (S3: tbl_df/tbl/data.frame)
##  $ observation_date: Date[1:879], format: "1946-01-01" "1946-02-01" ...
##  $ gold            : num [1:879] NA NA NA NA NA NA NA NA NA NA ...
##  $ oil             : num [1:879] 1.17 1.17 1.17 1.27 1.27 1.27 1.27 1.52 1.52 1.52 ...
##  $ yen             : num [1:879] NA NA NA NA NA NA NA NA NA NA ...
#4  Create a new data set called "goycc" that contains all complete caises of goy data. Utilize complete.cases function.

goycc <- goy[complete.cases(goy),]
sum(is.na(goycc))
## [1] 0
#5  Create a stand alone variable "date" that takes on values of "observation_date" 

date <- as.character(goycc$observation_date)
str(date)
##  chr [1:578] "1971-01-01" "1971-02-01" "1971-03-01" "1971-04-01" ...
#6  Find the range of dates covered in goycc data set by applying range() function to "date" variable. 

range(date)
## [1] "1971-01-01" "2019-02-01"
#7  Create a time series objected called "goyccts" by utilizing goycc dataset and ts() function. In this dataset please exclude the first column of the goycc dataset. 

df <- goycc[,-1]
goyccts <- ts(df, start = c(1971,1), end = c(2019,2), freq = 12)
str(goyccts)
##  Time-Series [1:578, 1:3] from 1971 to 2019: 37.9 38.7 38.9 39 40.5 ...
##  - attr(*, "dimnames")=List of 2
##   ..$ : NULL
##   ..$ : chr [1:3] "gold" "oil" "yen"
#8  Reasign the value of the yen varible from the goyccts data set by conventing the exchange rate of yen that represents the price of 1 US Dollar in terms of Japanese yen to represent the price of 1 Yen in terms of US Dollar.This way if the number increases it represent appriciation of Yen. Hint: Reasign the value of yen variable by taking a reciprocal. 

goyccts[,'yen'] <- 1/goyccts[,'yen']
head(goyccts)
##          gold  oil         yen
## [1,] 37.86750 3.56 0.002793140
## [2,] 38.71600 3.56 0.002796851
## [3,] 38.87283 3.56 0.002797057
## [4,] 39.00100 3.56 0.002797178
## [5,] 40.49250 3.56 0.002797884
## [6,] 40.10477 3.56 0.002797893
#9  Plot the time series plot of the three assets. Do you see any trend? Do you see any seasonal component?
plot(goyccts)

#10 Utilize the aggregate function to plot annual prices of the three assets. How does this graph differ from the monthly time series plot?
plot(aggregate(goyccts))

#11 Find the average summer price of oil for the entire sample.
# June, July, and August defined as summer:
summer <- goycc[format.Date(goycc$observation_date, "%m") == '06' | format.Date(goycc$observation_date, "%m") == '07' | format.Date(goycc$observation_date, "%m") == '08',]
mean(summer$oil)
## [1] 37.26092
#12 Find the average winter price of oil for the entire sample.
#  December, January, February as winter 
winter <- goycc[format.Date(goycc$observation_date, "%m") == '12' | format.Date(goycc$observation_date, "%m") == '01' | format.Date(goycc$observation_date, "%m") == '02',]
mean(winter$oil)
## [1] 34.74591
#13 How does the summer price of oil compare to the winter price of oil.
#   Please provide your answer in percentages. 
(mean(summer$oil)-mean(winter$oil))/mean(winter$oil)
## [1] 0.07238298
#Average Summer price of oil is 7.24% more than average winter price of oil. 
#14 Use window() function to create three stand alone variables "gold", "oil", and "yen" that take on values of the "gold", "oil", and "yen" variables from the goyccts dataset starting from January of 2005

gold <- window(goyccts[,'gold'], start=c(2005,1))
oil <- window(goyccts[,'oil'], start=c(2005,1))
yen <- window(goyccts[,'yen'], start = c(2005,1))
#15 Use plot() and decompose() functions to generate three graphs that would depict the observed values, trends, seasonal, and random components for "gold" "oil" and "yen" variables. Would you choose multiplicative or additive decomposition model for each of the variables?

plot(gold)

gold_dec <- decompose(gold, type = 'additive')
plot(gold_dec)

plot(oil)

oil_dec <- decompose(oil, type = 'additive')
plot(oil_dec)

plot(yen)

yen_dec <- decompose(yen, type = 'additive')
plot(yen_dec)

#16 For each of the variables extract the random component and save them as "goldrand", "oilrand", and "yenrand". Moreover, use na.omit() function to deal with the missing values.
#17 For the random component of each of the assets, please estimate autocorrelation function.Does any of the assets exhibit autocorrelation? If yes, to what degree? Keep in mind there are missing values. 

goldrand <- na.omit(gold_dec$random)
oilrand <- na.omit(oil_dec$random)
yenrand <- na.omit(yen_dec$random)

acf(goldrand)

acf(oilrand)

acf(yenrand)

#Yes, all three gold, oil, and yen exhibit autocorrelation.

# Gold shows higher autocorrelation for lag value less than 0.7. Oil shows high autocorrelation for lag value less than 0.7 and lag value equal to 1.6
#18 For all possible pairs of assets please estimate cross-correlation function Do any of the variable lead or precede each other? Could you use any of the varibales to predict values of other variables? Make sure to use detranded and seasonally adjusted variables. ("goldrand", "oilrand", and "yenrand")

ts.plot(goldrand, oilrand, col=c("red","blue"))

ccf(goldrand,oilrand)

ts.plot(goldrand, yenrand, col=c("red","blue"))

ccf(goldrand, yenrand)

ts.plot(oilrand, yenrand, col=c("red","blue"))

ccf(oilrand, yenrand)

#Leading variables are changing over the period, making it difficult to use one variable to predict the others
#19 Based on the time series plot of gold, oil, and yen prices, there appears to be no systematic trends or seasonal effects. Therefore, it is reasonable to use exponential smoothing for these time series.Estimate alpha, the smoothing parameter for gold, oil and yen. What does the value of alpha tell you tell you about the behavior of the mean? What is the estimated value of the mean for each asset?

# HoltWinters function with additional parameters set to 0.
gold.hw <- HoltWinters(gold, beta = 0, gamma = 0); gold.hw
## Holt-Winters exponential smoothing with trend and additive seasonal component.
## 
## Call:
## HoltWinters(x = gold, beta = 0, gamma = 0)
## 
## Smoothing parameters:
##  alpha: 0.9999196
##  beta : 0
##  gamma: 0
## 
## Coefficients:
##            [,1]
## a   1317.813562
## b     14.375654
## s1   -10.446046
## s2    33.326755
## s3    87.130633
## s4    -3.000999
## s5   -26.579571
## s6   -24.278783
## s7   -16.737978
## s8   -15.299397
## s9   -26.929144
## s10  -11.735742
## s11   12.610796
## s12    1.939475
oil.hw <- HoltWinters(oil, beta = 0, gamma = 0); oil.hw
## Holt-Winters exponential smoothing with trend and additive seasonal component.
## 
## Call:
## HoltWinters(x = oil, beta = 0, gamma = 0)
## 
## Smoothing parameters:
##  alpha: 0.9999373
##  beta : 0
##  gamma: 0
## 
## Coefficients:
##           [,1]
## a   60.4241433
## b    0.8635184
## s1  -4.4704514
## s2   2.5357986
## s3   3.8866319
## s4   3.7537153
## s5   0.2449653
## s6   5.1678819
## s7   4.8407986
## s8   0.5891319
## s9  -5.0542014
## s10 -5.4162847
## s11 -0.6033681
## s12 -5.4746181
yen.hw <- HoltWinters(yen, beta = 0, gamma = 0); yen.hw
## Holt-Winters exponential smoothing with trend and additive seasonal component.
## 
## Call:
## HoltWinters(x = yen, beta = 0, gamma = 0)
## 
## Smoothing parameters:
##  alpha: 0.9999335
##  beta : 0
##  gamma: 0
## 
## Coefficients:
##              [,1]
## a    9.188601e-03
## b   -4.127660e-05
## s1  -5.538264e-05
## s2  -9.677855e-06
## s3   4.065366e-04
## s4   1.737083e-04
## s5  -7.941011e-05
## s6   1.152514e-04
## s7   1.479382e-04
## s8  -6.280495e-05
## s9  -2.754848e-04
## s10 -2.384694e-04
## s11  1.170997e-05
## s12 -1.339148e-04
# gold: alpha = 0.9999196, estimated mean = 1317.81
# oil: alpha =  0.9999373, estimated mean = 60.42
# yen: alpha =  0.9999335, estimated mean is 0.0092
# The alpha values imply there is little smoothing in all three, gold, oil and yen
#20 Use plot() function to generate three graphs that depict observed and exponentially smoothed values for each asset.

plot(gold.hw)

plot(oil.hw)

plot(yen.hw)

#21 Use window() function to create 3 new variables called "goldpre", "oilpre", and "yenpre" that covers the period from January 2005,until August 2018. 

goldpre <- window(gold, start = c(2005,1), end = c(2018,8)) 
oilpre <- window(oil, start = c(2005,1), end = c(2018,8)) 
yenpre <- window(yen, start = c(2005,1), end = c(2018,8)) 
#22 Use window() function to create 3 new variables called goldpost, oilpost, and yenpost that covers the period from September 2018, until February 2019.

goldpost <- window(gold, start = c(2018,9), end = c(2019,2)) 
oilpost <- window(oil, start = c(2018,9), end = c(2019,2)) 
yenpost <- window(yen, start = c(2018,9), end = c(2019,2)) 
#23 Estimate HoltWinters filter model for each asset, while using only only pre data.Save each of these estimates as "gold.hw", "oil.hw", and "yen.hw".


gold.hw <- HoltWinters(goldpre, seasonal = 'additive')
gold.hw
## Holt-Winters exponential smoothing with trend and additive seasonal component.
## 
## Call:
## HoltWinters(x = goldpre, seasonal = "additive")
## 
## Smoothing parameters:
##  alpha: 0.8572867
##  beta : 0.03101722
##  gamma: 1
## 
## Coefficients:
##             [,1]
## a   1198.1930522
## b     -2.1675372
## s1     2.5061057
## s2   -26.8815961
## s3   -38.8898693
## s4   -51.7983177
## s5    -5.7212105
## s6    18.1013981
## s7     8.2484540
## s8    -0.5993937
## s9    -3.5512328
## s10    7.2344144
## s11   -1.1028923
## s12    3.6660387
oil.hw <- HoltWinters(oilpre, seasonal = 'additive')
oil.hw
## Holt-Winters exponential smoothing with trend and additive seasonal component.
## 
## Call:
## HoltWinters(x = oilpre, seasonal = "additive")
## 
## Smoothing parameters:
##  alpha: 1
##  beta : 0.005599748
##  gamma: 0
## 
## Coefficients:
##           [,1]
## a   62.8921181
## b    0.3491132
## s1   4.8407986
## s2   0.5891319
## s3  -5.0542014
## s4  -5.4162847
## s5  -0.6033681
## s6  -5.4746181
## s7  -4.4704514
## s8   2.5357986
## s9   3.8866319
## s10  3.7537153
## s11  0.2449653
## s12  5.1678819
yen.hw <- HoltWinters(yenpre, seasonal = 'additive')
yen.hw 
## Holt-Winters exponential smoothing with trend and additive seasonal component.
## 
## Call:
## HoltWinters(x = yenpre, seasonal = "additive")
## 
## Smoothing parameters:
##  alpha: 0.8612286
##  beta : 0.06008427
##  gamma: 1
## 
## Coefficients:
##              [,1]
## a    8.732109e-03
## b   -2.194822e-05
## s1   1.981890e-04
## s2   1.498968e-04
## s3  -5.760429e-05
## s4  -2.263830e-04
## s5  -1.381907e-04
## s6  -1.024081e-04
## s7  -1.578987e-04
## s8  -1.493823e-04
## s9  -7.458267e-05
## s10  1.494413e-04
## s11  1.835743e-04
## s12  2.771825e-04
#24 Use HoltWinters filter estimates generated in#23 and predict() function 
#   to create a 6 month ahead forecast of the gold, oil, and yen prices. 
#   Save these forcasted values as "goldforc", "oilforc", and "yenforc".

goldforc <-predict(gold.hw, n.ahead = 6)
oilforc <- predict(oil.hw, n.ahead = 6)
yenforc <- predict(yen.hw, n.ahead = 6)
#25 Use ts.plot() function to plot side-by-side post sample prices 
#   ("goldpost", "oilpost","yenpost") and their forecasted counterparts.
#   Please designate red color to represent the actual prices, 
#   and blue doted lines to represent forecasted values. 
par(mfrow = c(1,3))
plot1 <- ts.plot(goldpost, goldforc, lty = 1:2, col = c('red','blue'), ylim = c(1050,1320))
plot2 <- ts.plot(oilpost, oilforc,lty = 1:2, col = c('red','blue'))
plot3 <- ts.plot(yenpost, yenforc,lty = 1:2, col = c('red','blue'))

#26 Please calculate forecast mean percentage error for each assets forecasting model. Which asset's forecasting model has the lowest mean percentage error?

gold_pct <- mean(((goldpost - goldforc)/goldpost)*100); gold_pct
## [1] 5.98464
oil_pct <- mean(((oilpost - oilforc)/oilpost)*100); oil_pct
## [1] -7.141958
yen_pct <- mean(((yenpost - yenforc)/yenpost)*100); yen_pct
## [1] 3.700225
#27 Use gold, oil, and yen variables to estimate HoltWinters model for each asset. Save these estimates as "goldc.hw", "oilc.hw", and "yenc.hw".
goldc.hw <- HoltWinters(gold, seasonal = 'additive')
oilc.hw <- HoltWinters(oil, seasonal = 'additive')
yenc.hw <- HoltWinters(yen, seasonal = 'additive')
#28 Use "goldc.hw", "oilc.hw", and "yenc.hw" models to create an out-of-sample forecasts to predict the prices of each of the assets for the rest of the 2019. Save these forecasts as "goldforcos", "oilforcos", "yenforcos".What is the forecasted price of Gold for November 2019? 


goldforcoc <-predict(goldc.hw, n.ahead = 10)
oilforcoc <- predict(oilc.hw, n.ahead = 10)
yenforcoc <- predict(yenc.hw, n.ahead = 10)

goldforcoc_10 <- goldforcoc['Nov']
#29 Create time series plots for each asset, that combines the actual price data of each asset and their out-of-sample forecasted values. Please designate red color to represent the actual prices, and blue doted lines to represent forecasted values. What do you think will happen to the price of each asset by the end of the year?

par(mfrow = c(1,3))
plot4 <- ts.plot(gold, goldforcoc, lty = 1:4, col = c('red','blue'))
plot5 <- ts.plot(oil, oilforcoc,lty = 1:4, col = c('red','blue'))
plot6 <- ts.plot(yen, yenforcoc,lty = 1:4, col = c('red','blue'))

#The prices of all assets will drop by the end of the year.
#30 Please calculate percentage between the price of each asset in February 2019 and their forecasted December 2019 prices. Which asset promises the highest rate of return? 

gold_feb <- window(gold, start = c(2019,2))[[1]]
gold_dec <- goldforcoc[[10]]
gold_perc <- gold_feb/gold_dec; 
1 - gold_perc # EOY price will decrease by 3.98%.
## [1] -0.03984293
oil_feb <- window(oil, start = c(2019,2))[[1]]
oil_dec <- oilforcoc[[10]]
oil_perc <- oil_feb/oil_dec; 
1 - oil_perc # EOY price will increase by 5.26%.
## [1] 0.05258212
yen_feb <- window(yen, start = c(2019,2))[[1]]
yen_dec <- yenforcoc[[10]]
yen_perc <- yen_feb/yen_dec; 
1-yen_perc # EOY price will increase by 0.07%.
## [1] 0.0007605987