Stochastic Optimization: Team Project 2

Nalini Agrawal: NA23633
Holly Jones: haj372
Kartik Sharma: ks44273
Carlton Washburn: cmw3646



1. Calculate the daily returns for each stock using the 2012 price data.

# Question 1
# Making the daily return matrix

daily_return=apply(priceMat, 2, function(a) diff(a)/a[-length(a)])



2. As our initial candidate for the similarity matrix, find the correlation matrix for the returns of the 100 stocks. Note that there will be missing data in the price matrix (NA which stands for Not Available). You need to specify ‘use’ argument in the ‘cor’ function in order to handle NAs.

# Question 2
# Making the correlation matrix
rho=cor(daily_return, use="pairwise.complete.obs")
#corr_mat



3. Code the integer program above as another function that returns the weights for each of the stock that needs to be in your portfolio weights = constructFund(rho, q, priceMat, sharesMat, unique_tickers, unique_dates)

This will amount to simply formulating the integer program, solving it and then using the market capitalization of each company on the last date to compute weights. The output weights will be a vector of size n with only q non-zero elements denoting the weights.

library(lpSolve)

# Question 3
#n=100, q=25
n = 100
q = 25

#Objective function
obj = c(rho,rep(0,100))    

b = c(q,rep(1,100),rep(0,10000))     #rhs constraint

mat2 = matrix(NA,100,10100)  

#constraint 1
row1 = c(rep(0,10000),rep(1,100))      

#constraint 2
for(x in 1:n)   
{
  mat2[x,] = c(rep(0,(x-1)*n),rep(1,n),rep(0,n*(n-x+1)))
}

#constraint 3
diag1 = diag(1,10000,10000)       
diagneg1 = diag(-1,100,100)
neg1mat = matrix(diagneg1,10000,100,byrow=T)
cons3 = cbind(diag1,neg1mat)

A = rbind(row1,mat2,cons3)

dir = c(rep("=",101),rep("<=",10000))

sol = lp("max",obj,A,dir,b,all.bin = T)

sol$status
## [1] 0
#sol$solution



4. Use your weights to construct an index portfolio at the end of 2012.

Compare how this index portfolio performs monthly in 2013 as compared to the NASDAQ 100 index using the 2013 stock data provided. Here you may assume that you can directly invest in the Index as if it is a stock. Present your findings using any visualizations or tabulations. You can assume that you will be investing 1 million in your fund. In this case, your shares of each stock you choose to construct the portfolio should be large. As a result, you can leave the shares as non-integers, because the effect that the non-integer parts of shares have should be marginal.

#Question 3 continued

#Construct Fund Function
#=============================================
constructFund = function(rho,q,priceMat,sharesMat,unique_tickers,unique_dates){

n = ncol(priceMat)

obj = c(rho,rep(0,100)) #c value

b = c(q,rep(1,100),rep(0,10000)) #rhs constraint

mat2 = matrix(NA,100,10100) 

for(x in 1:n) #constraint 2
{
mat2[x,] = c(rep(0,(x-1)*n),rep(1,n),rep(0,n*(n-x+1)))
}


row1 = c(rep(0,10000),rep(1,100)) #constraint 1

diag1 = diag(1,10000,10000) #constraint 3
diagneg1 = diag(-1,100,100)
neg1mat = matrix(diagneg1,10000,100,byrow=T)
cons3 = cbind(diag1,neg1mat)

A = rbind(row1,mat2,cons3)

dir = c(rep("=",101),rep("<=",10000))

sol = lp("max",obj,A,dir,b,all.bin = T)


last_prices = priceMat[250,]
last_shares = sharesMat[250,]
V = last_prices*last_shares
sol_mat = matrix(sol$solution[1:10000],100,100,byrow=T)

w = list()
for (i in 1:100){
value = sum(sol_mat[,i])*V
w[i] = value
}

weights = unlist(w)/sum(unlist(w))
return (weights)
}
#==================================================================================
weights = constructFund(rho, 25, priceMat, sharesMat, unique_tickers, unique_dates) 
invested = weights*1000000
shares = invested/monthlyPriceMat[1]

monthly_returns = diff(monthlyPriceMat) %*% shares
nasdaq = c(2731.53, 2738.58, 2818.69, 2887.44, 2981.76,2909.60, 3090.19, 3073.81, 3218.20, 3377.73, 3487.82)
cum_monthly = cumsum(monthly_returns)
cum_nasdaq = cumsum(nasdaq)

month = seq(1,11)

df = data.frame(month,monthly_returns,nasdaq,cum_monthly,cum_nasdaq)
df
##    month monthly_returns  nasdaq cum_monthly cum_nasdaq
## 1      1        2898.417 2731.53    2898.417    2731.53
## 2      2        7264.726 2738.58   10163.143    5470.11
## 3      3        3601.769 2818.69   13764.912    8288.80
## 4      4        7434.611 2887.44   21199.523   11176.24
## 5      5       -3601.354 2981.76   17598.169   14158.00
## 6      6       10095.545 2909.60   27693.715   17067.60
## 7      7       -2846.780 3090.19   24846.934   20157.79
## 8      8        7475.376 3073.81   32322.311   23231.60
## 9      9        2553.714 3218.20   34876.024   26449.80
## 10    10        8962.459 3377.73   43838.483   29827.53
## 11    11        4250.521 3487.82   48089.004   33315.35
#VISUALIZATION
#===================================================================
library(ggplot2)
# Monthly Returns Dot Plot
gg = ggplot(df) + geom_point(aes(month,nasdaq,color="NASDAQ")) 
gg = gg + geom_point(aes(month,monthly_returns,color="Portfolio")) 
gg = gg + scale_color_discrete(name="Index") + labs(y="Returns",title="Monthly Returns")
gg  

plot of chunk unnamed-chunk-5

# Cumulative Returns Dot Plot
gg2 = ggplot(df) + geom_point(aes(month,cum_nasdaq,color="NASDAQ")) 
gg2 = gg2 + geom_point(aes(month,cum_monthly,color="Portfolio")) 
gg2 = gg2 + scale_color_discrete(name="Index") + labs(y="Returns",title="Cumulative Returns")
gg2

plot of chunk unnamed-chunk-5

# Monthly Returns Line Plot
gg3 = ggplot(df) + geom_line(aes(month,nasdaq,color="NASDAQ")) 
gg3 = gg3 + geom_line(aes(month,monthly_returns,color="Portfolio")) 
gg3 = gg3 + scale_color_discrete(name="Index") + labs(y="Returns",title="Monthly Returns")
gg3 

plot of chunk unnamed-chunk-5

# Cumulative Return Line Plot
gg4 = ggplot(df) + geom_line(aes(month,cum_nasdaq,color="NASDAQ")) 
gg4 = gg4 + geom_line(aes(month,cum_monthly,color="Portfolio")) 
gg4 = gg4 + scale_color_discrete(name="Index") + labs(y="Returns",title="Cumulative Returns")
gg4

plot of chunk unnamed-chunk-5

5. Earlier you used correlation as the similarity measure. Now instead create your own similarity measure and put it in a function similarityMat that has the same inputs and outputs:

rho = similarityMat(priceMat, sharesMat, unique_tickers,unique_dates)

Use this rho in your function call to constructFund and as in step 4, evaluate the performance of this fund as well.

The similarity measure we used to complete this problem was cosine similarity.

#install.packages("lsa")
library(lsa)

rho1=cosine(priceMat)
rho1[is.na(rho1)] = 0
weights_cosine = constructFund(rho1,25,priceMat,sharesMat,unique_tickers,unique_dates)

#================================== 
invested1 = weights_cosine*1000000
shares1 = invested1/monthlyPriceMat[1]

monthly_returns1 = diff(monthlyPriceMat) %*% shares1

nasdaq = c(2731.53, 2738.58, 2818.69, 2887.44, 2981.76,2909.60, 3090.19, 3073.81, 3218.20, 3377.73, 3487.82)

cum_monthly1 = cumsum(monthly_returns1)
cum_nasdaq = cumsum(nasdaq)

month = seq(1,11)

df1 = data.frame(month, monthly_returns1, nasdaq, cum_monthly1, cum_nasdaq)
df1
##    month monthly_returns1  nasdaq cum_monthly1 cum_nasdaq
## 1      1        -7096.928 2731.53  -7096.92764    2731.53
## 2      2         1767.656 2738.58  -5329.27211    5470.11
## 3      3         1991.066 2818.69  -3338.20596    8288.80
## 4      4         3321.894 2887.44    -16.31232   11176.24
## 5      5         1373.179 2981.76   1356.86692   14158.00
## 6      6        -6966.584 2909.60  -5609.71757   17067.60
## 7      7        -2708.732 3090.19  -8318.44998   20157.79
## 8      8         4726.865 3073.81  -3591.58453   23231.60
## 9      9         4153.485 3218.20    561.90060   26449.80
## 10    10         6401.126 3377.73   6963.02700   29827.53
## 11    11         4926.827 3487.82  11889.85361   33315.35
#VISUALIZATIONS
#=============================
# Monthly Returns Dot Plot
gg5 = ggplot(df1) + geom_point(aes(month, nasdaq, color="NASDAQ")) 
gg5 = gg5 + geom_point(aes(month, monthly_returns1, color="Portfolio")) 
gg5 = gg5 + scale_color_discrete(name="Index") + labs(y="Returns",title="Monthly Returns")
gg5  

plot of chunk unnamed-chunk-6

# Cumulative Returns Dot Plot
gg6 = ggplot(df1) + geom_point(aes(month, cum_nasdaq, color="NASDAQ")) 
gg6 = gg6 + geom_point(aes(month, cum_monthly1, color="Portfolio")) 
gg6 = gg6 + scale_color_discrete(name="Index") + labs(y="Returns",title="Cumulative Returns")
gg6

plot of chunk unnamed-chunk-6

# Monthly Returns Line Plot
gg7 = ggplot(df1) + geom_line(aes(month, nasdaq, color="NASDAQ")) 
gg7 = gg7 + geom_line(aes(month, monthly_returns1, color="Portfolio")) 
gg7 = gg7 + scale_color_discrete(name="Index") + labs(y="Returns",title="Monthly Returns")
gg7 

plot of chunk unnamed-chunk-6

# Cumulative Return Line Plot
gg8 = ggplot(df1) + geom_line(aes(month,cum_nasdaq,color="NASDAQ")) 
gg8 = gg8 + geom_line(aes(month, cum_monthly1, color="Portfolio")) 
gg8 = gg8 + scale_color_discrete(name="Index") + labs(y="Returns",title="Cumulative Returns")
gg8

plot of chunk unnamed-chunk-6