# Set working directory not to repeat the path to the folder while loading data
setwd("~/Desktop/2nd Year 1st Sem/Time Series/Assignment 1")
return <- read.csv("assignment1Data2023.csv")
head(return)
## X x
## 1 1 150.8031
## 2 2 152.2122
## 3 3 151.4941
## 4 4 150.6815
## 5 5 151.0155
## 6 6 151.0590
colnames(return) <- c("Day","Value")
head(return)
## Day Value
## 1 1 150.8031
## 2 2 152.2122
## 3 3 151.4941
## 4 4 150.6815
## 5 5 151.0155
## 6 6 151.0590
class(return)
## [1] "data.frame"
# Convert to the TS object
returnts <- ts(as.vector(return$Value), start=1, end = 127)
returnts
## Time Series:
## Start = 1
## End = 127
## Frequency = 1
## [1] 150.80305807 152.21215799 151.49405248 150.68153801 151.01548379
## [6] 151.05896873 152.24529300 154.32836585 154.99211320 151.47319862
## [11] 148.65887120 149.42475661 149.76723188 152.79419244 156.32484804
## [16] 155.47711985 148.64751873 144.28974550 145.15092082 145.12823085
## [21] 150.98001094 155.18967939 152.31702307 141.41912041 135.88269351
## [26] 137.35092521 136.38396383 145.79343214 151.98285080 146.45194985
## [31] 130.45934365 122.22024827 123.96194926 123.76340190 137.00970421
## [36] 143.94322947 135.92688865 117.26486197 108.57562583 109.74256049
## [41] 110.60954367 129.01589743 135.49843374 124.31976202 104.65195279
## [46] 95.26900949 94.29487663 94.37477787 117.53027695 123.77761512
## [51] 109.88510644 88.11615162 78.25287150 76.08756434 75.57225471
## [56] 102.50237665 108.08497244 92.67132160 69.40893637 58.84454029
## [61] 56.13468662 56.91251923 87.40934899 93.06782601 76.51780460
## [66] 52.35868054 41.32878155 38.34250451 40.69185825 74.43156392
## [71] 81.05790547 63.98375651 38.45748121 27.31432454 24.78323409
## [76] 29.52859185 64.81749992 71.96911598 54.21016437 26.14338285
## [81] 14.02972048 10.95491723 17.52416632 52.46378082 59.97072454
## [86] 43.17610325 12.84834639 0.01752939 -4.03357918 3.83155944
## [91] 38.65303227 47.93226560 32.04728507 0.78978042 -12.15661705
## [96] -16.54803490 -6.29142400 28.71521115 39.03123618 22.97984606
## [101] -10.09253006 -23.88937164 -28.87604996 -15.81700422 21.48876386
## [106] 33.26822703 17.48928106 -16.72770955 -32.43157779 -38.77786096
## [111] -21.00997026 18.81868817 32.96428210 16.06261533 -19.34561245
## [116] -36.37031978 -44.01729753 -20.96522767 21.14554925 37.57495177
## [121] 19.41408315 -16.29602394 -35.55475699 -44.62194939 -17.30734816
## [126] 25.25656020 43.71448310
summary(returnts)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -44.62 21.32 71.97 71.35 135.90 156.32
#Time Series Plot
plot(returnts,
type = "o",
xlab = "Time",
ylab = "Investment Return (in AUD 100,000)",
main = " Time series plot of share market trader's investment return (127 days)",
cex.main = 0.9
)
#Finding correlation from lags
y = returnts
head(y)
## [1] 150.8031 152.2122 151.4941 150.6815 151.0155 151.0590
# Generate first lag of investment return series
x = zlag(returnts)
head(x)
## [1] NA 150.8031 152.2122 151.4941 150.6815 151.0155
#Create an index to get rid of first NA value in x
index = 2:length(x)
#Calculate correlation between numerical values in x and y
cor(y[index],x[index])
## [1] 0.9635593
plot(y[index],x[index],
ylab = "Investment Return Series",
xlab = "First lag of Investment Return series",
main = "Scatter plot of Investment Return Series and its first lag",
cex.main = 0.9)
#Finding frequency from ACF Plot
acf(returnts,lag.max = 50, main="ACF Plot of Investment Return Series Over 50 lag")
From the Figure, we can see that the pattern is repeating every 6 or 7 lags. By trying out both of those frequencies, frequency of 7 has better statistics and diagnosis than frequency of 6. Thus, we will choose frequency of 7 for seasonal and cosine model.
We will create new time series object seasonal_returnts with frequency of 7 and its’ time series plot as below and the plot is shown in above Figure.
#Creating new time series with frequency of 7 for seasonal and cosine trend model
seasonal_retrunts <- ts(data = as.vector(return$Value), start = 1, frequency = 7)
seasonal_retrunts
## Time Series:
## Start = c(1, 1)
## End = c(19, 1)
## Frequency = 7
## [1] 150.80305807 152.21215799 151.49405248 150.68153801 151.01548379
## [6] 151.05896873 152.24529300 154.32836585 154.99211320 151.47319862
## [11] 148.65887120 149.42475661 149.76723188 152.79419244 156.32484804
## [16] 155.47711985 148.64751873 144.28974550 145.15092082 145.12823085
## [21] 150.98001094 155.18967939 152.31702307 141.41912041 135.88269351
## [26] 137.35092521 136.38396383 145.79343214 151.98285080 146.45194985
## [31] 130.45934365 122.22024827 123.96194926 123.76340190 137.00970421
## [36] 143.94322947 135.92688865 117.26486197 108.57562583 109.74256049
## [41] 110.60954367 129.01589743 135.49843374 124.31976202 104.65195279
## [46] 95.26900949 94.29487663 94.37477787 117.53027695 123.77761512
## [51] 109.88510644 88.11615162 78.25287150 76.08756434 75.57225471
## [56] 102.50237665 108.08497244 92.67132160 69.40893637 58.84454029
## [61] 56.13468662 56.91251923 87.40934899 93.06782601 76.51780460
## [66] 52.35868054 41.32878155 38.34250451 40.69185825 74.43156392
## [71] 81.05790547 63.98375651 38.45748121 27.31432454 24.78323409
## [76] 29.52859185 64.81749992 71.96911598 54.21016437 26.14338285
## [81] 14.02972048 10.95491723 17.52416632 52.46378082 59.97072454
## [86] 43.17610325 12.84834639 0.01752939 -4.03357918 3.83155944
## [91] 38.65303227 47.93226560 32.04728507 0.78978042 -12.15661705
## [96] -16.54803490 -6.29142400 28.71521115 39.03123618 22.97984606
## [101] -10.09253006 -23.88937164 -28.87604996 -15.81700422 21.48876386
## [106] 33.26822703 17.48928106 -16.72770955 -32.43157779 -38.77786096
## [111] -21.00997026 18.81868817 32.96428210 16.06261533 -19.34561245
## [116] -36.37031978 -44.01729753 -20.96522767 21.14554925 37.57495177
## [121] 19.41408315 -16.29602394 -35.55475699 -44.62194939 -17.30734816
## [126] 25.25656020 43.71448310
#Time Series Plot
plot(seasonal_retrunts,
type = "o",
xlab = "Time",
ylab = "Investment Return (in AUD 100,000)",
main = " Time series plot of share market trader's investment return (127 days)",
cex.main = 0.9)
#First model : Linear Trend model in time
#Time Series Plot
t <- time(returnts)
t
## Time Series:
## Start = 1
## End = 127
## Frequency = 1
## [1] 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18
## [19] 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36
## [37] 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54
## [55] 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72
## [73] 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90
## [91] 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108
## [109] 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126
## [127] 127
model1 <- lm(returnts ~ t) #Label the model
plot(returnts,
ylab = "Investment Return (in AUD 100,000)",
xlab='Time',
type='o',
main = "Linear trend line to share market trader's investment return series",
cex.main = 0.9)
abline(model1)
summary(model1)
##
## Call:
## lm(formula = returnts ~ t)
##
## Residuals:
## Min 1Q Median 3Q Max
## -38.053 -17.959 2.056 15.095 72.799
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 173.38533 3.85513 44.98 <2e-16 ***
## t -1.59425 0.05227 -30.50 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 21.59 on 125 degrees of freedom
## Multiple R-squared: 0.8816, Adjusted R-squared: 0.8806
## F-statistic: 930.3 on 1 and 125 DF, p-value: < 2.2e-16
#Second model : Quadratic Trend model in time
t = time(returnts) # Get time points
t2 = t^2 # Create t^2
#Label the model
model2 = lm(returnts ~ t + t2)
plot(
ts(fitted(model2)),
ylab = "Investment Return (in AUD 100,000)",
xlab = "Time",
main = "Fitted quadratic curve to share market trader's investment return series",
cex.main = 0.9,
ylim = c(min(c(fitted(model2), as.vector(returnts))) ,
max(c(fitted(model2), as.vector(returnts)))
)
)
lines(as.vector(returnts),type="o")
summary(model2)
##
## Call:
## lm(formula = returnts ~ t + t2)
##
## Residuals:
## Min 1Q Median 3Q Max
## -38.345 -18.387 0.941 15.584 68.478
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 177.915494 5.838752 30.471 < 2e-16 ***
## t -1.804958 0.210586 -8.571 3.43e-14 ***
## t2 0.001646 0.001594 1.033 0.304
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 21.59 on 124 degrees of freedom
## Multiple R-squared: 0.8826, Adjusted R-squared: 0.8807
## F-statistic: 465.9 on 2 and 124 DF, p-value: < 2.2e-16
#Third Model : Seasonal Trend Model in Time
pattern=season(seasonal_retrunts)
model3=lm(seasonal_retrunts ~ pattern -1) # -1 removes the intercept term
names(model3$coefficients) <- c('Pattern 1', 'Pattern 2', 'Pattern 3', 'Pattern 4', 'Pattern 5', 'Pattern 6', 'Pattern 7')
plot(
ts(fitted(model3)),
ylab = "Investment Return (in AUD 100,000)",
xlab = "Time",
main = "Fitted Seasonal model to share market trader's investment return series",
cex.main = 0.9,
ylim = c(min(c(fitted(model3), as.vector(seasonal_retrunts))),
max(c(fitted(model3), as.vector(seasonal_retrunts)))),
col = "green"
)
lines(as.vector(seasonal_retrunts),type="o")
summary(model3)
##
## Call:
## lm(formula = seasonal_retrunts ~ pattern - 1)
##
## Residuals:
## Min 1Q Median 3Q Max
## -96.865 -56.530 -2.747 58.007 98.773
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## Pattern 1 95.81 14.17 6.761 5.27e-10 ***
## Pattern 2 87.23 14.56 5.991 2.24e-08 ***
## Pattern 3 65.06 14.56 4.469 1.80e-05 ***
## Pattern 4 54.72 14.56 3.758 0.000266 ***
## Pattern 5 52.24 14.56 3.588 0.000483 ***
## Pattern 6 58.54 14.56 4.021 0.000102 ***
## Pattern 7 84.50 14.56 5.804 5.39e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 61.77 on 120 degrees of freedom
## Multiple R-squared: 0.5979, Adjusted R-squared: 0.5744
## F-statistic: 25.49 on 7 and 120 DF, p-value: < 2.2e-16
#Fouth model : Cosine Trend model in time
har <- harmonic(seasonal_retrunts, 1)
#Label the model
model4 <- lm(seasonal_retrunts ~ har)
plot(
ts(fitted(model4)),
ylab = "Investment Return (in AUD 100,000)",
xlab = "Time",
main = "Fitted cosine wave to share market trader's investment return series",
ylim = c(min(c(fitted(model4), as.vector(seasonal_retrunts))),
max(c(fitted(model4), as.vector(seasonal_retrunts)))),
col = "green"
)
lines(as.vector(seasonal_retrunts),type="o")
summary(model4)
##
## Call:
## lm(formula = seasonal_retrunts ~ har)
##
## Residuals:
## Min 1Q Median 3Q Max
## -94.243 -56.084 -6.569 59.584 101.395
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 71.175 5.399 13.184 < 2e-16 ***
## harcos(2*pi*t) 22.608 7.605 2.973 0.00355 **
## harsin(2*pi*t) 2.731 7.665 0.356 0.72217
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 60.84 on 124 degrees of freedom
## Multiple R-squared: 0.06742, Adjusted R-squared: 0.05238
## F-statistic: 4.482 on 2 and 124 DF, p-value: 0.0132
##
## Shapiro-Wilk normality test
##
## data: res.model1
## W = 0.97322, p-value = 0.01269
##
## Shapiro-Wilk normality test
##
## data: res.model2
## W = 0.97472, p-value = 0.01764
##
## Shapiro-Wilk normality test
##
## data: res.model3
## W = 0.92612, p-value = 3.159e-06
##
## Shapiro-Wilk normality test
##
## data: res.model4
## W = 0.92615, p-value = 3.17e-06
#Forecasting with best fit model
t = c(128:142)
t2 = t^2
future <- data.frame(t, t2)
forecast <- predict(model2 , future , interval = "prediction")
print(forecast)
## fit lwr upr
## 1 -26.14887 -70.41401 18.1162589
## 2 -27.53078 -71.89306 16.8315133
## 3 -28.90938 -73.37393 15.5551602
## 4 -30.28470 -74.85674 14.2873401
## 5 -31.65673 -76.34165 13.0281928
## 6 -33.02546 -77.82877 11.7778576
## 7 -34.39090 -79.31827 10.5364729
## 8 -35.75305 -80.81027 9.3041762
## 9 -37.11190 -82.30491 8.0811037
## 10 -38.46746 -83.80232 6.8673905
## 11 -39.81974 -85.30264 5.6631702
## 12 -41.16871 -86.80600 4.4685747
## 13 -42.51440 -88.31253 3.2837342
## 14 -43.85679 -89.82237 2.1087771
## 15 -45.19590 -91.33562 0.9438294
plot(returnts,
type='o',
xlim =c(1,142),
ylim=c(-90,150),
ylab = "Investment Return (in AUD 100,000)",
xlab = "Time",
main="Forecasts using Quadratic Trend Model Fitted to Investment Return Series",
cex.main=0.9)
lines(ts(as.vector(forecast[,1]), start = 128), col="red", type="l")
lines(ts(as.vector(forecast[,2]), start = 128), col="blue", type="l")
lines(ts(as.vector(forecast[,3]), start = 128), col="blue", type="l")
legend("bottomleft",
lty=1,
pch=1, col=c("black","blue","red"),
text.width = 20, c("Data","5% forecast limits", "Forecasts"))
#Recommend Solution
#Combination of Cubic Trend Model and Seasonal Model
t <- time(returnts)
t2 <- t^2
model5=lm(seasonal_retrunts ~ pattern + t + t2 - 1 )
summary(model5)
##
## Call:
## lm(formula = seasonal_retrunts ~ pattern + t + t2 - 1)
##
## Residuals:
## Min 1Q Median 3Q Max
## -48.649 -9.099 -0.641 9.216 45.273
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## patternMonday 201.206050 4.820589 41.74 <2e-16 ***
## patternTuesday 188.813995 4.932101 38.28 <2e-16 ***
## patternWednesday 168.244133 4.949564 33.99 <2e-16 ***
## patternThursday 159.502619 4.965967 32.12 <2e-16 ***
## patternFriday 158.620533 4.981318 31.84 <2e-16 ***
## patternSaturday 166.512593 4.995628 33.33 <2e-16 ***
## patternSunday 194.064846 5.008908 38.74 <2e-16 ***
## t -1.755723 0.139203 -12.61 <2e-16 ***
## t2 0.001253 0.001054 1.19 0.237
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 14.26 on 118 degrees of freedom
## Multiple R-squared: 0.9789, Adjusted R-squared: 0.9773
## F-statistic: 609.4 on 9 and 118 DF, p-value: < 2.2e-16
x11()
plot(
ts(fitted(model5)),
ylab='y',
main = "Fitted seaonal plus quadratic curve to investment return series",
ylim = c(min(c(fitted(model5), as.vector(seasonal_retrunts))),
max(c(fitted(model5), as.vector(seasonal_retrunts)))),
col="red"
)
lines(as.vector(seasonal_retrunts),type="o")
summary(model5)
##
## Call:
## lm(formula = seasonal_retrunts ~ pattern + t + t2 - 1)
##
## Residuals:
## Min 1Q Median 3Q Max
## -48.649 -9.099 -0.641 9.216 45.273
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## patternMonday 201.206050 4.820589 41.74 <2e-16 ***
## patternTuesday 188.813995 4.932101 38.28 <2e-16 ***
## patternWednesday 168.244133 4.949564 33.99 <2e-16 ***
## patternThursday 159.502619 4.965967 32.12 <2e-16 ***
## patternFriday 158.620533 4.981318 31.84 <2e-16 ***
## patternSaturday 166.512593 4.995628 33.33 <2e-16 ***
## patternSunday 194.064846 5.008908 38.74 <2e-16 ***
## t -1.755723 0.139203 -12.61 <2e-16 ***
## t2 0.001253 0.001054 1.19 0.237
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 14.26 on 118 degrees of freedom
## Multiple R-squared: 0.9789, Adjusted R-squared: 0.9773
## F-statistic: 609.4 on 9 and 118 DF, p-value: < 2.2e-16
# Residual analysis
par(mfrow=c(2,3))
res.model5 = rstudent(model5)
plot(y = res.model5,
x = as.vector(time(seasonal_retrunts)),
xlab = 'Time',
ylab='Standardized Residuals',
type='o',
main = "Standardised residuals from Seasonal plus Quadratic Trend model")
hist(res.model5,
xlab='Standardized Residuals',
main = "Histogram of standardised residuals")
qqnorm(y=res.model5,
main = "QQ plot of standardised residuals")
qqline(y=res.model5,
col = 2,
lwd = 1,
lty = 2)
shapiro.test(res.model5)
##
## Shapiro-Wilk normality test
##
## data: res.model5
## W = 0.97937, p-value = 0.0496
acf(res.model5, main = "ACF of standardized residuals.")
pacf(res.model5, main = "PACF of standardized residuals.")