## 'data.frame': 552 obs. of 4 variables:
## $ sd : num 6.48 11.84 11.69 10.73 11.21 ...
## $ sharpe : num 0.33 0.48 0.47 0.48 -0.51 -0.29 0.19 1.19 0.71 0.7 ...
## $ returns: num 7.95 13.62 13.45 12.85 -4.48 ...
## $ risk : Factor w/ 10 levels "1","10","2","3",..: 7 8 8 8 5 5 5 2 8 8 ...
classification under rpart
library(rpart) # library for decision tree
library(rpart.plot) # For decision tree visualization
## Warning: package 'rpart.plot' was built under R version 3.3.1
col <- ifelse(traindata$returns <0, "bad", ifelse(traindata$returns < 4.5, "average", "good"))
subsetdata_results <- as.factor(col)
plot(traindata$risk, traindata$returns, col=subsetdata_results, xtab="risk", ytab="returns")
legend(x=0,y=30, legend=c("Bad", "Average", "Good"),col = c("red", "blue", "green"), pch = 1, bty = "n")

nr <- nrow(traindata)
set.seed(1)
tModel <- rpart(returns ~ sharpe , data = traindata) # save the model ad tModel
rpart.plot(tModel)

# Make the prediction
value <- predict(tModel, data=traindata)
# 5.5 Access the model performance. Use root mean square value
mean(sqrt(sum((value-traindata$returns)^2)))
## [1] 110.0581
head(data.frame(value, traindata$returns, traindata$risk, value-traindata$returns), 20)
## value traindata.returns traindata.risk value...traindata.returns
## 1 7.9451247 7.9463834 6 -0.001258648
## 2 12.0938781 13.6164251 7 -1.522547012
## 3 12.0938781 13.4467422 7 -1.352864052
## 4 12.0938781 12.8512525 7 -0.757374352
## 5 -3.5019811 -4.4790649 4 0.977083775
## 6 0.9866623 0.4670037 4 0.519658523
## 7 7.9451247 5.2725210 4 2.672603708
## 8 17.1488412 23.7793257 10 -6.630484579
## 9 12.0938781 13.7861174 7 -1.692239282
## 10 12.0938781 13.6265176 7 -1.532639502
## 11 12.0938781 13.9503928 8 -1.856514702
## 12 0.9866623 0.3357734 10 0.650888901
## 13 0.9866623 0.6491214 9 0.337540884
## 14 3.7707233 4.9189383 8 -1.148215062
## 15 0.9866623 -3.6008725 9 4.587534796
## 16 3.7707233 4.4046315 7 -0.633908256
## 17 3.7707233 4.3161354 7 -0.545412146
## 18 12.0938781 11.0406296 10 1.053248498
## 19 12.0938781 14.4442702 9 -2.350392072
## 20 12.0938781 14.0652860 9 -1.971407912
Linear Model
# 1. Build the linear model
lmModel <- lm(returns ~ . -risk, data = traindata)
# 2. Use the linear model to make prediction
#predValue <- predict(lmModel, data.frame(sharpe = 0.3))
#predValue <- predict(lmModel, data.frame(sharpe = c(1,1.9)))
#predValue <- predict(lmModel, data.frame(sharpe = traindata$sharpe))
# 3. Access the model parameters
coef(lmModel)
## (Intercept) sd sharpe
## 6.7036960 -0.1794602 13.8031113
# 5. Model Performance assessment
sumModel <- summary(lmModel)
sumModel$r.squared
## [1] 0.73707
r <- cor(traindata$returns, traindata$sharpe)
r
## [1] 0.8515433