# Load the required libraries
library(tidyverse) # for map
library(quantmod)
library(PerformanceAnalytics)
# Define the ticker symbol for the Taiwan Stock Exchange Value Weighted Index (TWSI)
ticker_symbol <- "^TWII" # TWSI's ticker symbol
# Download the data from Yahoo Finance
TWII.adj <-getSymbols(ticker_symbol, src = 'yahoo',
from = "2013-10-01",
to = "2023-09-28",
auto.assign = TRUE,
warnings = FALSE) %>% map(~Ad(get(.))) %>% na.omit()
# View the head of the data
head(TWII.adj)
## [[1]]
## TWII.Adjusted
## 2013-10-01 8186.989
## 2013-10-02 8216.487
## 2013-10-03 8358.987
## 2013-10-04 8364.518
## 2013-10-07 8333.628
## 2013-10-08 8375.618
## 2013-10-09 8344.698
## 2013-10-11 8349.338
## 2013-10-14 8273.928
## 2013-10-15 8367.848
## ...
## 2023-09-14 16807.561
## 2023-09-15 16920.920
## 2023-09-18 16698.240
## 2023-09-19 16636.320
## 2023-09-20 16534.750
## 2023-09-21 16316.670
## 2023-09-22 16344.480
## 2023-09-25 16452.230
## 2023-09-26 16276.070
## 2023-09-27 16310.360
# prices now is a list object that contains xts object,
# according to the environment, so we take it out
class(TWII.adj)
## [1] "list"
TWII.adj<-TWII.adj[[1]]
class(TWII.adj)
## [1] "xts" "zoo"
# now we can comfortably plot it
plot(TWII.adj)
I first compare the difference between Simple Returms and
Log Returns, both are calculated using
dailyReturn.
Then, I test to see if there is any difference of the results of
Simple Returns using dailyReturn and
periodReturn.
Lastly, I test to see if there is any difference of the results
of Log Returns using dailyReturn and
periodReturn.
Results:
dailyReturns and
periodReturns(period argument set to
'daily')dailyReturn function, we have
already verified from Comparison 2 and 3 that there is
no difference between using dailyReturn function and
periodReturn as long as the period argument is
set to 'daily'.#Simple Returns using dailyReturn
TWII.adj.SRet_1 <- quantmod::dailyReturn(TWII.adj, leading=F) %>% na.omit()
#Log Returns using dailyReturn
TWII.adj.LRet_1 <- quantmod::dailyReturn(TWII.adj,type= "log", leading=F) %>% na.omit()
dif_twii_adj <- list()
dif_twii_adj <- TWII.adj.SRet_1 - TWII.adj.LRet_1
summary(dif_twii_adj)
## Index daily.returns
## Min. :2013-10-02 Min. :0.000e+00
## 1st Qu.:2016-03-28 1st Qu.:2.338e-06
## Median :2018-09-23 Median :1.242e-05
## Mean :2018-09-27 Mean :4.541e-05
## 3rd Qu.:2021-03-31 3rd Qu.:4.223e-05
## Max. :2023-09-27 Max. :2.080e-03
#Simple Returns using dailyReturn
TWII.adj.SRet_1 <- quantmod::dailyReturn(TWII.adj, leading=F) %>% na.omit()
#Simple Returns using periodReturn
TWII.adj.SRet_2 <- quantmod::periodReturn(TWII.adj, period= 'daily',type= "arithmetic", leading=F) %>% na.omit()
dif_twii_adj <- list()
dif_twii_adj <- TWII.adj.SRet_1 - TWII.adj.SRet_2
summary(dif_twii_adj)
## Index daily.returns
## Min. :2013-10-02 Min. :0
## 1st Qu.:2016-03-28 1st Qu.:0
## Median :2018-09-23 Median :0
## Mean :2018-09-27 Mean :0
## 3rd Qu.:2021-03-31 3rd Qu.:0
## Max. :2023-09-27 Max. :0
#Log Returns using dailyReturn
TWII.adj.LRet_1 <- quantmod::dailyReturn(TWII.adj, type = "log", leading=F) %>% na.omit()
#Log Returns using periodReturn
TWII.adj.LRet_2 <- quantmod::periodReturn(TWII.adj, period= 'daily',type= "log", leading=F) %>% na.omit()
dif_twii_adj <- list()
dif_twii_adj <- TWII.adj.LRet_1 - TWII.adj.LRet_2
summary(dif_twii_adj)
## Index daily.returns
## Min. :2013-10-02 Min. :0
## 1st Qu.:2016-03-28 1st Qu.:0
## Median :2018-09-23 Median :0
## Mean :2018-09-27 Mean :0
## 3rd Qu.:2021-03-31 3rd Qu.:0
## Max. :2023-09-27 Max. :0
We know that the mathematical notation of simple returns and log returns are:
\[ R_{t}^{simple}=\frac{P_t}{P_{t-1}} \]
\[ r_t^{log}=log(\frac{P_t}{P_{t-1}})=log(R_t^{simple}) \]
The two variables are not independent of each other, as the log return is just the log of the simple return. Hence, we have to assume that the population of the two variables are paired.
t.test(TWII.adj.SRet_1[,1], TWII.adj.LRet_1[,1], alternative = "two.sided", paired = "TRUE")
##
## Paired t-test
##
## data: TWII.adj.SRet_1[, 1] and TWII.adj.LRet_1[, 1]
## t = 18.57, df = 2435, p-value < 2.2e-16
## alternative hypothesis: true mean difference is not equal to 0
## 95 percent confidence interval:
## 4.061244e-05 5.020220e-05
## sample estimates:
## mean difference
## 4.540732e-05
As you can see, if we don’t assume that the two variables are paired, we dervive a different result, as the statistic and the degree of freedom is completely different from each other.
t.test(TWII.adj.SRet_1[,1], TWII.adj.LRet_1[,1], alternative = "two.sided")
##
## Welch Two Sample t-test
##
## data: TWII.adj.SRet_1[, 1] and TWII.adj.LRet_1[, 1]
## t = 0.16639, df = 4870, p-value = 0.8679
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.0004895777 0.0005803924
## sample estimates:
## mean of x mean of y
## 0.0003283524 0.0002829451
We are going to inspect the differences between:
Simple and Log Returns
The result of monthly returns calculated by the function
monthlyReturn and periodReturn(set the
period parameter to "monthly")
The result of weekly returns calculated by the function
weeklyReturn and periodReturn(set the
period parameter to "weekly")
Results:
As we can see from the summary results of Comparison 2
and 3, there isn’t any difference in using
weeklyReturns and
periodReturns(period argument set to
'weekly') and in using monthlyReturns and
periodReturns(period argument set to
'monthly')
From Comparison 1, we can see that there is a
difference between simple and log returns. Although I have only used
data calculated by the dailyReturn function, we have
already verified from Comparison 2 and
3 that there is no difference between using
dailyReturn function and periodReturn as long
as the period argument is set to
'daily'.
I have created log and simple returns in both weekly and monthly
frequency using periodReturn, weeklyReturn and
monthlyReturn, which leads to creating 8 price data.
#log monthly using monthlyReturn
TWII.adj.LRet_monthly_m <- quantmod::monthlyReturn(TWII.adj, type= "log", leading=F) %>% na.omit()
#log monthly using periodReturn
TWII.adj.LRet_monthly_p <- quantmod::periodReturn(TWII.adj, period="monthly", type= "log", leading=F) %>% na.omit()
#log weekly using weeklyReturn
TWII.adj.LRet_weekly_w <- quantmod::weeklyReturn(TWII.adj, type= "log", leading=F) %>% na.omit()
#log weekly using periodReturn
TWII.adj.LRet_weekly_p <- quantmod::periodReturn(TWII.adj, period="weekly", type= "log", leading=F) %>% na.omit()
#simple monthly using monthlyReturn
TWII.adj.SRet_monthly_m <- quantmod::monthlyReturn(TWII.adj, type= 'arithmetic', leading=F) %>% na.omit()
#simple monthly using periodReturn
TWII.adj.SRet_monthly_p <- quantmod::periodReturn(TWII.adj, type= 'arithmetic', leading=F) %>% na.omit()
#simple weekly using monthlyReturn
TWII.adj.SRet_weekly_w <- quantmod::weeklyReturn(TWII.adj, type= 'arithmetic', leading=F) %>% na.omit()
#simple weekly using monthlyReturn
TWII.adj.SRet_weekly_p <- quantmod::periodReturn(TWII.adj, type= 'arithmetic', leading=F) %>% na.omit()
# monthly difference test
dif_twii_monthly_m <- list()
dif_twii_monthly_m <- TWII.adj.SRet_monthly_m - TWII.adj.LRet_monthly_m
summary(dif_twii_monthly_m)
## Index monthly.returns
## Min. :2013-11-29 Min. :1.690e-07
## 1st Qu.:2016-05-15 1st Qu.:6.191e-05
## Median :2018-10-31 Median :2.633e-04
## Mean :2018-10-29 Mean :9.123e-04
## 3rd Qu.:2021-04-14 3rd Qu.:8.018e-04
## Max. :2023-09-27 Max. :1.087e-02
t.test(TWII.adj.SRet_monthly_m[,1], TWII.adj.LRet_monthly_m[,1], alternative = "two.sided")
##
## Welch Two Sample t-test
##
## data: TWII.adj.SRet_monthly_m[, 1] and TWII.adj.LRet_monthly_m[, 1]
## t = 0.16562, df = 236, p-value = 0.8686
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.009940117 0.011764757
## sample estimates:
## mean of x mean of y
## 0.006438629 0.005526310
# weekly difference test
dif_twii_weekly_w <- list()
dif_twii_weekly_w <- TWII.adj.SRet_weekly_w - TWII.adj.LRet_weekly_w
summary(dif_twii_weekly_w)
## Index weekly.returns
## Min. :2013-10-11 Min. :0.000e+00
## 1st Qu.:2016-04-11 1st Qu.:1.820e-05
## Median :2018-09-28 Median :6.949e-05
## Mean :2018-10-01 Mean :2.258e-04
## 3rd Qu.:2021-03-29 3rd Qu.:1.951e-04
## Max. :2023-09-27 Max. :5.975e-03
t.test(TWII.adj.SRet_weekly_w[,1], TWII.adj.LRet_weekly_w[,1], alternative = "two.sided")
##
## Welch Two Sample t-test
##
## data: TWII.adj.SRet_weekly_w[, 1] and TWII.adj.LRet_weekly_w[, 1]
## t = 0.17088, df = 1028, p-value = 0.8644
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.002367545 0.002819226
## sample estimates:
## mean of x mean of y
## 0.001522543 0.001296703
# monthly difference test - Simple Returns
dif_twii_monthly <- list()
dif_twii_monthly <- TWII.adj.SRet_monthly_m - TWII.adj.SRet_monthly_p
summary(dif_twii_monthly)
## Index monthly.returns
## Min. :2013-11-29 Min. :0
## 1st Qu.:2016-05-15 1st Qu.:0
## Median :2018-10-31 Median :0
## Mean :2018-10-29 Mean :0
## 3rd Qu.:2021-04-14 3rd Qu.:0
## Max. :2023-09-27 Max. :0
t.test(TWII.adj.SRet_monthly_m[,1], TWII.adj.SRet_monthly_p[,1], alternative = "two.sided")
##
## Welch Two Sample t-test
##
## data: TWII.adj.SRet_monthly_m[, 1] and TWII.adj.SRet_monthly_p[, 1]
## t = 0, df = 236, p-value = 1
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.01083383 0.01083383
## sample estimates:
## mean of x mean of y
## 0.006438629 0.006438629
# monthly difference test - Log Returns
dif_twii_monthly_L <- list()
dif_twii_monthly_L <- TWII.adj.LRet_weekly_w - TWII.adj.LRet_weekly_p
summary(dif_twii_monthly_L)
## Index weekly.returns
## Min. :2013-10-11 Min. :0
## 1st Qu.:2016-04-11 1st Qu.:0
## Median :2018-09-28 Median :0
## Mean :2018-10-01 Mean :0
## 3rd Qu.:2021-03-29 3rd Qu.:0
## Max. :2023-09-27 Max. :0
t.test(TWII.adj.LRet_monthly_m[,1], TWII.adj.LRet_monthly_p[,1], alternative = "two.sided")
##
## Welch Two Sample t-test
##
## data: TWII.adj.LRet_monthly_m[, 1] and TWII.adj.LRet_monthly_p[, 1]
## t = 0, df = 236, p-value = 1
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.01087101 0.01087101
## sample estimates:
## mean of x mean of y
## 0.00552631 0.00552631
# weekly difference test - Simple Returns
dif_twii_weekly <- list()
dif_twii_weekly <- TWII.adj.SRet_weekly_w - TWII.adj.SRet_weekly_p
summary(dif_twii_monthly)
## Index monthly.returns
## Min. :2013-11-29 Min. :0
## 1st Qu.:2016-05-15 1st Qu.:0
## Median :2018-10-31 Median :0
## Mean :2018-10-29 Mean :0
## 3rd Qu.:2021-04-14 3rd Qu.:0
## Max. :2023-09-27 Max. :0
t.test(TWII.adj.SRet_weekly_w[,1], TWII.adj.SRet_weekly_p[,1], alternative = "two.sided")
##
## Welch Two Sample t-test
##
## data: TWII.adj.SRet_weekly_w[, 1] and TWII.adj.SRet_weekly_p[, 1]
## t = -1.2295, df = 131.83, p-value = 0.2211
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.012825638 0.002993466
## sample estimates:
## mean of x mean of y
## 0.001522543 0.006438629
# weekly difference test - Log Returns
dif_twii_weekly_L <- list()
dif_twii_weekly_L <- TWII.adj.LRet_weekly_w - TWII.adj.LRet_weekly_p
summary(dif_twii_weekly_L)
## Index weekly.returns
## Min. :2013-10-11 Min. :0
## 1st Qu.:2016-04-11 1st Qu.:0
## Median :2018-09-28 Median :0
## Mean :2018-10-01 Mean :0
## 3rd Qu.:2021-03-29 3rd Qu.:0
## Max. :2023-09-27 Max. :0
t.test(TWII.adj.LRet_weekly_w[,1], TWII.adj.LRet_weekly_p[,1], alternative = "two.sided")
##
## Welch Two Sample t-test
##
## data: TWII.adj.LRet_weekly_w[, 1] and TWII.adj.LRet_weekly_p[, 1]
## t = 0, df = 1028, p-value = 1
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.002602187 0.002602187
## sample estimates:
## mean of x mean of y
## 0.001296703 0.001296703
plot(TWII.adj.LRet_1)
#5.b
logR.min<-min(na.omit(TWII.adj.LRet_1))
logR.max<-max(TWII.adj.LRet_1%>%na.omit())
logR.mean<-mean(TWII.adj.LRet_1%>%na.omit())
logR.sd<-sd(TWII.adj.LRet_1%>%na.omit())
logR<-na.omit(TWII.adj.LRet_1)
logR<-coredata(logR)%>%as.numeric()
max(density(logR)$y)
## [1] 56.48458
max(dnorm(logR,mean=logR.mean, sd=logR.sd))
## [1] 41.84229
plot(density(logR), type="l", xlab="logR", ylab="Density",main="日報酬率機率密度函數與標準常態分配之比較")
lines(sort(logR), dnorm(sort(logR), mean=logR.mean, sd=logR.sd), lty=2, col=2)
abline(v=logR.mean, lty=3, col=3)
text(-0.04,30, expression(f(x) == frac(1, sqrt(2*pi*sigma^2)) * e^{-frac((x - mu)^2, 2*sigma^2)} ), adj = 0.5)
legend(.025, 55, lty=1:3, col=1:3, c("日報酬率機率密度","標準常態分配機率密度","平均日報酬率"), cex = 0.8)
Although I have finished the question, I have doubts for the assumptions we make based on the hypothesis test we’re told to execute:
Since the returns are from the price data of the same stock, wouldn’t that mean that the population of the positive returns and negative returns are the same? then how can the two variables independent?
Does the return time series have serial correlation? If so, then can they be independent and identically distributed?
If the population isn’t even normally distributed, the test statistic will not follow an F distribution. Should we just assume that the population follows a normal distribution?
The violations of these assumptions will affect the distribution of the test statistic, which will lead to incorrect estimations of the p-value. Hence, we can’t really trust the results of the test.
#5.c
fl <- logR[logR<0]
pt <- logR[logR>0]
var.test(pt, fl, alternative = "less")
##
## F test to compare two variances
##
## data: pt and fl
## F = 0.63656, num df = 1320, denom df = 1112, p-value = 1.865e-15
## alternative hypothesis: true ratio of variances is less than 1
## 95 percent confidence interval:
## 0.0000000 0.6996504
## sample estimates:
## ratio of variances
## 0.6365608
I have also noticed that there are two returns that equals to zero.
logR[logR==0]
## [1] 0 0