Describe Dateset

Sectors selected: Information Technology, Real Estate, Health Care
Stocks: CRM (Salesforce), NVDA (NVIDIA), AMT (American Tower Corp), PSA (Public Storage), BHC (Bausch Health Companies), PFE (Pfizer Inc)
Time Period: 10/01/2013 to 11/14/2021

Visualization

readStockCSV = function(code) {
  result = read.csv(paste('./resources/', code, '.csv', sep=''))
  if ("Adj.Close" %in% names(result)) {
    names(result)[names(result) == 'Adj.Close'] = "AdjClose"
  }
  result$Date = as.Date(result$Date, format='%Y-%m-%d')
  result$Name = code
  result
}

filter_df = function(df, name, start_date, end_date) {
  result = df
  if (!missing(name)) {
    result = df[df$Name == name,]
  }
  if (missing(start_date)) {
    start_date = min(result$Date)
  }
  if (missing(end_date)) {
    end_date = max(result$Date)
  }
  result = result[result$Date >= as.Date(start_date) & result$Date <= as.Date(end_date),]
  result = result[order(result$Date),]
  result
}

calculateReturns = function(df) {
  result = NULL
  for (name in unique(df$Name)) {
    curDf = df[df$Name == name,]
    starting = curDf[curDf$Date == min(curDf$Date),]$Open
    curDf$Return = curDf$AdjClose / starting
    if (is.null(result)) {
      result = curDf
    } else {
      result = rbind(result, curDf)
    }
  }
  result = result[order(df$Name),]
  result
}

calculateReturnsBYSector = function(df) {
  result = NULL
  for (sector in unique(df$Sector)) {
    sectorDf = df[df$Sector == sector,]
    for (date in unique(df$Date)) {
      curDateDf = sectorDf[sectorDf$Date == date,]
      if (length(curDateDf$Date) != 2) {
        print(curDateDf)
      }
      assert(length(curDateDf$Date) == 2) # Two securities per sector
      sectorReturn = sum(curDateDf$Return) / 2
      curResDf = data.frame(Date=curDateDf$Date[1], Sector=sector, SectorReturn=sectorReturn)
      if (is.null(result)) {
        result = curResDf
      } else {
        result = rbind(result, curResDf)
      }
    }
  }
  result
}
crm = readStockCSV('CRM')
crm$Sector = 'IT'
nvda  = readStockCSV('NVDA')
nvda$Sector = 'IT'
amt = readStockCSV('AMT')
amt$Sector = 'RealEstate'
psa = readStockCSV('PSA')
psa$Sector = 'RealEstate'
bhc = readStockCSV('BHC')
bhc$Sector = 'HealthCare'
pfe = readStockCSV('PFE')
pfe$Sector = 'HealthCare'
all = rbind(crm, nvda, amt, psa, bhc, pfe)
all = calculateReturns(all)
all_by_sector = calculateReturnsBYSector(all)

1. BHC Bollinger Bands (2015 Sep to Nov)

ggplot(filter_df(all, 'BHC', '2015-09-01', '2015-11-01'), aes(x=Date, y=AdjClose)) + geom_candlestick(aes(open=Open, high=High, low=Low, close=Close)) + ggtitle("BHC Bollinger Bands (2015 Sep to Nov)")

2. NVDA Bollinger Bands (2021 Sep to Nov)

ggplot(filter_df(all, 'NVDA', '2021-09-01', '2021-11-01'), aes(x=Date, y=AdjClose)) + geom_candlestick(aes(open=Open, high=High, low=Low, close=Close)) + ggtitle("NVDA Bollinger Bands (2021 Sep to Nov)")

3. Total Return (2013 Oct to 2021 Nov)

ggplot(all, aes(x=Date, y=Return, colour=factor(Name))) + geom_line() + ggtitle("Total Return")

4. Total Return without NVDA (2013 Oct to 2021 Nov)

ggplot(all[all$Name != 'NVDA',], aes(x=Date, y=Return, colour=factor(Name))) + geom_line() + ggtitle("Total Return (Without NVDA)")

5. Total Return by Sector (2013 Oct to 2021 Nov)

ggplot(all_by_sector, aes(x=Date, y=SectorReturn, colour=factor(Sector))) + geom_line() + ggtitle("Total Return (by Sector)")

6. Total Return by Sector without IT (2013 Oct to 2021 Nov)

ggplot(all_by_sector[all_by_sector$Sector != 'IT',], aes(x=Date, y=SectorReturn, colour=factor(Sector))) + geom_line() + ggtitle("Total Return (by Sector without IT)")

Equal Weighted vs. Equal Weighted

avg_days_per_month = round(length(unique(all$Date)) / (7 * 12 + 1.5)) # Number of months between 2013/10/01 to 2022/11/12
print("Average trading days per month:")
## [1] "Average trading days per month:"
print(avg_days_per_month)
## [1] 24
rebalanceDates = function(df, frequency_in_day) {
  dates = as.Date(ordered(unique(df$Date)))
  result = dates[seq(1, length(dates), frequency_in_day)]
  if (result[length(result)] != dates[length(dates)]) {
    result = c(result, dates[length(dates)])
  }
  result
}

calculateAvgReturn = function(df) {
  result = NULL
  for (date in unique(df$Date)) {
    cur = df[df$Date == date,]
    cur = data.frame(Date=date, Return=mean(cur$Return))
    if (is.null(result)) {
      result = cur
    } else {
      result = rbind(result, cur)
    }
  }
  result
}

rebalancePortfolio = function(df, frequency_in_day) {
  dates_to_rebalance = rebalanceDates(df, frequency_in_day)
  assert(length(dates_to_rebalance) > 2)
  result = NULL
  for (i in 2:length(dates_to_rebalance)) {
    start_date = dates_to_rebalance[i - 1]
    end_date = dates_to_rebalance[i]
    cur_df = df[df$Date >= start_date & df$Date <= end_date,]
    date_res = NULL
    for (name in unique(cur_df$Name)) {
      cur_stock = cur_df[cur_df$Name == name,]
      cur_stock$Return = cur_stock$AdjClose / cur_stock[cur_stock$Date == start_date,]$Open
      if (is.null(date_res)) {
        date_res = cur_stock
      } else {
        date_res = rbind(date_res, cur_stock)
      }
    }
    if (is.null(result)) {
        result = date_res
      } else {
        last_chunk_return = mean(result[result$Date == max(result$Date),]$Return)
        date_res$Return = date_res$Return * last_chunk_return
        date_res = date_res[!(date_res$Date %in% result$Date),]
        result = rbind(result, date_res)
      }
  }

  result$Rebalancing = FALSE
  result[result$Date %in% dates_to_rebalance,]$Rebalancing = TRUE
  result[result$Date == dates_to_rebalance[1] | result$Date == dates_to_rebalance[length(dates_to_rebalance)],]$Rebalancing = FALSE
  result = calculateAvgReturn(result)
  result
}
rebalanced_by_year = rebalancePortfolio(all, avg_days_per_month * 12)
rebalanced_by_quarter = rebalancePortfolio(all, avg_days_per_month * 3)
rebalanced_by_month = rebalancePortfolio(all, avg_days_per_month)
rebalanced_by_week = rebalancePortfolio(all, round(avg_days_per_month / 4))
ggplot(calculateAvgReturn(all), aes(x=Date, y=Return)) + geom_line() + ggtitle("Total Return (Equal Weighted)")

ggplot(rebalanced_by_year, aes(x=Date, y=Return)) + geom_line() + ggtitle("Total Return (Equal Weighted, Rebalance Every Year)")

ggplot(rebalanced_by_quarter, aes(x=Date, y=Return)) + geom_line() + ggtitle("Total Return (Equal Weighted, Rebalance Every Quarter)")

ggplot(rebalanced_by_month, aes(x=Date, y=Return)) + geom_line() + ggtitle("Total Return (Equal Weighted, Rebalance Every Month)")

ggplot(rebalanced_by_week, aes(x=Date, y=Return)) + geom_line() + ggtitle("Total Return (Equal Weighted, Rebalance Every Week)")

Model Fit

Generalized Inverse Gaussian and Generalized Hyperbolic Q-Q Plot

HealthCare

qqgig(all_by_sector[all_by_sector$Sector == 'HealthCare',]$SectorReturn, c(1,2,3))

qqghyp(all_by_sector[all_by_sector$Sector == 'HealthCare',]$SectorReturn, c(2,2,1,2,2))

## IT

qqgig(all_by_sector[all_by_sector$Sector == 'IT',]$SectorReturn, c(1,2,3))

qqghyp(all_by_sector[all_by_sector$Sector == 'IT',]$SectorReturn, c(2,2,1,2,2))

RealEstate

qqgig(all_by_sector[all_by_sector$Sector == 'RealEstate',]$SectorReturn, c(1,2,3))

qqghyp(all_by_sector[all_by_sector$Sector == 'RealEstate',]$SectorReturn, c(2,2,1,2,2))

Volatility Trend

plotGARCH = function(name, x) {
  x = ts(x)
  garchSpec <- ugarchspec(
           variance.model=list(model="sGARCH",
                               garchOrder=c(1,1)),
           mean.model=list(armaOrder=c(0,0)), 
           distribution.model="std")
  garchFit <- ugarchfit(spec=garchSpec, data=x)
  print(paste(name, 'ARCH Model:'))
  print(coef(garchFit))
  rhat <- garchFit@fit$fitted.values
  plot.ts(rhat)
  title(paste(name, '- rhat'))
  hhat <- ts(garchFit@fit$sigma^2)
  plot.ts(hhat)
  title(paste(name, '- hhat'))
}

By Stock

plotGARCH('CRM', filter_df(all, 'CRM')$AdjClose)
## [1] "CRM ARCH Model:"
##          mu       omega      alpha1       beta1       shape 
## 73.58844854  1.17542616  0.97439739  0.02460243 99.99994819

plotGARCH('NVDA', filter_df(all, 'NVDA')$AdjClose)
## [1] "NVDA ARCH Model:"
##         mu      omega     alpha1      beta1      shape 
## 40.6527571  0.3968506  0.7038610  0.2951389 99.9994406

plotGARCH('AMR', filter_df(all, 'AMT')$AdjClose)
## [1] "AMR ARCH Model:"
##          mu       omega      alpha1       beta1       shape 
## 84.86030898  0.93398011  0.93162748  0.06736825 99.99956359

plotGARCH('PSA', filter_df(all, 'PSA')$AdjClose)
## [1] "PSA ARCH Model:"
##          mu       omega      alpha1       beta1       shape 
## 185.0929605   3.9641859   0.8833225   0.1156775  99.9998026

plotGARCH('BHC', filter_df(all, 'BHC')$AdjClose)
## [1] "BHC ARCH Model:"
##          mu       omega      alpha1       beta1       shape 
## 23.20123063  0.37879583  0.96317321  0.03582666 99.99993706

plotGARCH('PFE', filter_df(all, 'PFE')$AdjClose)
## [1] "PFE ARCH Model:"
##          mu       omega      alpha1       beta1       shape 
## 25.68564685  0.03728284  0.90422733  0.09477260 99.99994589

By Sector

plotGARCH('HealthCare', all_by_sector[all_by_sector$Sector == 'HealthCare',]$SectorReturn)
## [1] "HealthCare ARCH Model:"
##           mu        omega       alpha1        beta1        shape 
## 7.735796e-01 5.975940e-05 8.802751e-01 1.187249e-01 9.070121e+01

plotGARCH('IT', all_by_sector[all_by_sector$Sector == 'IT',]$SectorReturn)
## [1] "IT ARCH Model:"
##           mu        omega       alpha1        beta1        shape 
## 1.141722e+00 3.033594e-04 9.989998e-01 3.385377e-08 9.999994e+01

plotGARCH('RealEstate', all_by_sector[all_by_sector$Sector == 'RealEstate',]$SectorReturn)
## [1] "RealEstate ARCH Model:"
##           mu        omega       alpha1        beta1        shape 
## 1.294028e+00 1.140804e-04 9.346728e-01 6.432723e-02 1.000000e+02