rm(list=ls())
library(corrplot)
## corrplot 0.84 loaded
df <- read.csv(file="./volatility.csv")
head(df)
AAPL.PVol = df$AAPL.PVol
GOOG.PVol = df$GOOG.PVol
AMZN.PVol = df$AMZN.PVol
GSPC.PVol = df$GSPC.PVol
VIX.Close = df$VIX.Close
AAPL.Close = df$AAPL.Close
VXAPLCLS = df$VXAPLCLS
VXAZNCLS = df$VXAZNCLS
VXGOGCLS = df$VXGOGCLS
GOOG.Close = df$GOOG.Close
AMZN.Close = df$AMZN.Close
GSPC.Close = df$GSPC.Close
Date = df$Date
AAPL.Ret = df$AAPL.Ret
AMZN.Ret = df$AMZN.Ret
GOOG.Ret = df$GOOG.Ret
GSPC.Ret = df$GSPC.Ret
AAPL.Vol = df$AAPL.Vol
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
m = cov(cbind(AAPL.PVol,GOOG.PVol,AMZN.PVol,GSPC.PVol,
VIX.Close, AAPL.Close, VXAPLCLS, VXAZNCLS,
VXGOGCLS, GOOG.Close, AMZN.Close, GSPC.Close,
Date, AAPL.Ret, AMZN.Ret, GOOG.Ret, GSPC.Ret, AAPL.Vol))
ans <- data.frame(row=rownames(m)[row(m)], col=colnames(m)[col(m)], corr=c(m)) %>% filter(col == 'AAPL.Vol')
write.csv(ans, "./cov.csv", row.names = FALSE)
ans
corr_matrix = cbind(AAPL.PVol, GOOG.PVol,AMZN.PVol,GSPC.PVol, AAPL.Close, GOOG.Close, AMZN.Close, GSPC.Close, AAPL.Ret, GOOG.Ret, AMZN.Ret, GSPC.Ret, VXAPLCLS, VXGOGCLS, VXAZNCLS, VIX.Close, AAPL.Vol, Date)
corr_matrix = cor(corr_matrix, method = c("pearson"))
corrplot(corr_matrix, method="color")
# Getting descriptive statistics
summary(AAPL.PVol)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.006714 0.011094 0.014523 0.016165 0.020046 0.034721
summary(GOOG.PVol)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.006429 0.010943 0.013973 0.015663 0.020833 0.028110
summary(AMZN.PVol)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.00610 0.01079 0.01394 0.01699 0.02175 0.04279
summary(GSPC.PVol)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.003286 0.005072 0.007195 0.008519 0.011868 0.019970
summary(VXAPLCLS)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 16.25 23.51 27.16 27.58 30.69 52.58
summary(VXAZNCLS)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 15.98 24.60 28.35 30.35 34.66 60.07
summary(VXGOGCLS)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 14.11 20.73 23.90 24.76 27.64 42.72
summary(VIX.Close)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 9.15 12.94 14.95 16.01 18.07 37.32
summary(AAPL.Close)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 35.55 43.88 48.15 49.67 53.87 73.41
summary(GOOG.Close)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 976.2 1085.2 1146.2 1150.9 1205.0 1361.2
summary(AMZN.Close)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1189 1606 1750 1716 1836 2040
summary(GSPC.Close)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 2351 2724 2818 2830 2924 3240
summary(AAPL.Ret)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -0.104924 -0.006761 0.001644 0.001096 0.010182 0.068053
summary(AMZN.Ret)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -0.0814235 -0.0069531 0.0015277 0.0009095 0.0108153 0.0902540
summary(GOOG.Ret)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -0.0800893 -0.0065340 0.0006584 0.0004872 0.0098441 0.0993795
summary(GSPC.Ret)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -0.0418425 -0.0031881 0.0008367 0.0003763 0.0057131 0.0484032
summary(AAPL.Vol)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.006714 0.011161 0.014546 0.016221 0.020046 0.034721
# Fitting a simple linear regression model for AAPL.Vol and
# each independent variable, and checking the five key
# assumptions: Linear relationship, Multivariate normality,
# No multicollinearity, No auto-correlation and Homoscedasticity,
# the assumptions of the simple linear regression are acceptable.
summary(lm(AAPL.Vol~AAPL.PVol))
##
## Call:
## lm(formula = AAPL.Vol ~ AAPL.PVol)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.012131 -0.003853 -0.001856 0.003688 0.015031
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.0103326 0.0007178 14.395 <2e-16 ***
## AAPL.PVol 0.3642590 0.0413424 8.811 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.005874 on 501 degrees of freedom
## Multiple R-squared: 0.1342, Adjusted R-squared: 0.1324
## F-statistic: 77.63 on 1 and 501 DF, p-value: < 2.2e-16
plot(AAPL.PVol,AAPL.Vol)
abline(lm(AAPL.Vol~AAPL.PVol))
plot(lm(AAPL.Vol ~ AAPL.PVol))
X = AAPL.PVol
Y = AAPL.Vol^1.3
summary(lm(Y ~ X))
##
## Call:
## lm(formula = Y ~ X)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.0047056 -0.0015163 -0.0007553 0.0013152 0.0063993
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.0024677 0.0002809 8.786 <2e-16 ***
## X 0.1469616 0.0161769 9.085 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.002298 on 501 degrees of freedom
## Multiple R-squared: 0.1414, Adjusted R-squared: 0.1397
## F-statistic: 82.53 on 1 and 501 DF, p-value: < 2.2e-16
mod = lm(formula = Y ~ X)
plot(X, Y)
abline(mod)
plot(lm(Y~X))
summary(lm(AAPL.Vol~GOOG.PVol))
##
## Call:
## lm(formula = AAPL.Vol ~ GOOG.PVol)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.010577 -0.004010 -0.001332 0.003647 0.016917
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.0095353 0.0007591 12.561 <2e-16 ***
## GOOG.PVol 0.4268321 0.0455419 9.372 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.005823 on 501 degrees of freedom
## Multiple R-squared: 0.1492, Adjusted R-squared: 0.1475
## F-statistic: 87.84 on 1 and 501 DF, p-value: < 2.2e-16
plot(GOOG.PVol, AAPL.Vol)
abline(lm(AAPL.Vol~GOOG.PVol))
plot(lm(AAPL.Vol~GOOG.PVol))
WOrking to transform data
# plot(AAPL.PVol, resid(lm(AAPL.Vol~exp(AAPL.PVol))))
# plot(lm(AAPL.Vol ~ log(AAPL.PVol)))
X = GOOG.PVol^3
Y = AAPL.Vol
summary(lm(Y ~ X))
##
## Call:
## lm(formula = Y ~ X)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.011667 -0.004118 -0.001034 0.003294 0.017549
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.340e-02 3.636e-04 36.87 <2e-16 ***
## X 5.183e+02 4.794e+01 10.81 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.005685 on 501 degrees of freedom
## Multiple R-squared: 0.1891, Adjusted R-squared: 0.1875
## F-statistic: 116.8 on 1 and 501 DF, p-value: < 2.2e-16
mod = lm(formula = Y ~ X)
plot(X, Y)
abline(mod)
plot(lm(Y~X))
# abline(lm(AAPL.Vol~(AAPL.PVol^2)))
summary(lm(AAPL.Vol~AMZN.PVol))
##
## Call:
## lm(formula = AAPL.Vol ~ AMZN.PVol)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.011690 -0.003388 -0.001062 0.003772 0.012970
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.0097048 0.0005251 18.48 <2e-16 ***
## AMZN.PVol 0.3836141 0.0275303 13.93 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.005359 on 501 degrees of freedom
## Multiple R-squared: 0.2793, Adjusted R-squared: 0.2779
## F-statistic: 194.2 on 1 and 501 DF, p-value: < 2.2e-16
plot(AMZN.PVol,AAPL.Vol)
abline(lm(AAPL.Vol~AMZN.PVol))
plot(lm(AAPL.Vol~AMZN.PVol))
X = AMZN.PVol^2
Y = AAPL.Vol^1.3
summary(lm(Y ~ X))
##
## Call:
## lm(formula = Y ~ X)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.0041362 -0.0013751 -0.0004717 0.0013413 0.0056002
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.0035150 0.0001229 28.6 <2e-16 ***
## X 3.6507349 0.2295454 15.9 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.002022 on 501 degrees of freedom
## Multiple R-squared: 0.3355, Adjusted R-squared: 0.3342
## F-statistic: 252.9 on 1 and 501 DF, p-value: < 2.2e-16
mod = lm(formula = Y ~ X)
plot(X, Y)
abline(mod)
plot(lm(Y~X))
summary(lm(AAPL.Vol~GSPC.PVol))
##
## Call:
## lm(formula = AAPL.Vol ~ GSPC.PVol)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.010067 -0.004458 -0.001580 0.004240 0.016591
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.0113798 0.0005934 19.177 <2e-16 ***
## GSPC.PVol 0.5683058 0.0625725 9.082 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.00585 on 501 degrees of freedom
## Multiple R-squared: 0.1414, Adjusted R-squared: 0.1397
## F-statistic: 82.49 on 1 and 501 DF, p-value: < 2.2e-16
plot(GSPC.PVol, AAPL.Vol)
abline(lm(AAPL.Vol~GSPC.PVol))
plot(lm(AAPL.Vol~GSPC.PVol))
X = GSPC.PVol^2
Y = AAPL.Vol
summary(lm(Y ~ X))
##
## Call:
## lm(formula = Y ~ X)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.010616 -0.004461 -0.001523 0.004048 0.017018
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.362e-02 3.729e-04 36.512 <2e-16 ***
## X 2.897e+01 2.990e+00 9.687 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.005794 on 501 degrees of freedom
## Multiple R-squared: 0.1578, Adjusted R-squared: 0.1561
## F-statistic: 93.84 on 1 and 501 DF, p-value: < 2.2e-16
mod = lm(formula = Y ~ X)
plot(X, Y)
abline(mod)
plot(lm(Y~X))
summary(lm(AAPL.Vol~AAPL.Close))
##
## Call:
## lm(formula = AAPL.Vol ~ AAPL.Close)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.010130 -0.003786 -0.001369 0.003280 0.017794
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.635e-02 1.831e-03 14.391 < 2e-16 ***
## AAPL.Close -2.038e-04 3.645e-05 -5.593 3.68e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.006125 on 501 degrees of freedom
## Multiple R-squared: 0.05877, Adjusted R-squared: 0.05689
## F-statistic: 31.28 on 1 and 501 DF, p-value: 3.678e-08
plot(AAPL.Vol~AAPL.Close)
abline(lm(AAPL.Vol~AAPL.Close))
# plot(AAPL.Close, lm(AAPL.Vol~AAPL.Close)$residuals)
plot(lm(AAPL.Vol~AAPL.Close))
summary(lm(AAPL.Vol~GOOG.Close))
##
## Call:
## lm(formula = AAPL.Vol ~ GOOG.Close)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.0117719 -0.0033752 -0.0005929 0.0032927 0.0172550
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.845e-02 3.603e-03 13.446 <2e-16 ***
## GOOG.Close -2.801e-05 3.123e-06 -8.968 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.00586 on 501 degrees of freedom
## Multiple R-squared: 0.1383, Adjusted R-squared: 0.1366
## F-statistic: 80.43 on 1 and 501 DF, p-value: < 2.2e-16
plot(AAPL.Vol~GOOG.Close)
abline(lm(AAPL.Vol~GOOG.Close))
#plot(GOOG.Close, lm(AAPL.Vol~GOOG.Close)$residuals)
plot(lm(AAPL.Vol~GOOG.Close))
summary(lm(AAPL.Vol~AMZN.Close))
##
## Call:
## lm(formula = AAPL.Vol ~ AMZN.Close)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.010157 -0.004862 -0.001207 0.003546 0.018764
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.421e-02 2.784e-03 8.698 < 2e-16 ***
## AMZN.Close -4.658e-06 1.614e-06 -2.885 0.00408 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.006261 on 501 degrees of freedom
## Multiple R-squared: 0.01635, Adjusted R-squared: 0.01438
## F-statistic: 8.325 on 1 and 501 DF, p-value: 0.004079
plot(AAPL.Vol~AMZN.Close)
abline(lm(AAPL.Vol~AMZN.Close))
# plot(AMZN.Close, lm(AAPL.Vol~AMZN.Close)$residuals)
plot(lm(AAPL.Vol~AMZN.Close))
summary(lm(AAPL.Vol~GSPC.Close))
##
## Call:
## lm(formula = AAPL.Vol ~ GSPC.Close)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.011231 -0.003555 -0.001053 0.004014 0.017945
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5.590e-02 4.913e-03 11.378 < 2e-16 ***
## GSPC.Close -1.402e-05 1.733e-06 -8.088 4.59e-15 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.005937 on 501 degrees of freedom
## Multiple R-squared: 0.1155, Adjusted R-squared: 0.1137
## F-statistic: 65.42 on 1 and 501 DF, p-value: 4.594e-15
plot(AAPL.Vol~GSPC.Close)
abline(lm(AAPL.Vol~GSPC.Close))
# plot(GSPC.Close, lm(AAPL.Vol~GSPC.Close)$residuals)
plot(lm(AAPL.Vol~GSPC.Close))
##returns
summary(lm(AAPL.Vol~AAPL.Ret))
##
## Call:
## lm(formula = AAPL.Vol ~ AAPL.Ret)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.010232 -0.004573 -0.001546 0.003899 0.020859
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.0162987 0.0002766 58.932 < 2e-16 ***
## AAPL.Ret -0.0709453 0.0158672 -4.471 9.63e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.006191 on 501 degrees of freedom
## Multiple R-squared: 0.03837, Adjusted R-squared: 0.03645
## F-statistic: 19.99 on 1 and 501 DF, p-value: 9.629e-06
plot(AAPL.Vol~AAPL.Ret)
abline(lm(AAPL.Vol~AAPL.Ret))
# plot(AAPL.Ret, lm(AAPL.Vol~AAPL.Ret)$residuals)
plot(lm(AAPL.Vol~AAPL.Ret))
summary(lm(AAPL.Vol~GOOG.Ret))
##
## Call:
## lm(formula = AAPL.Vol ~ GOOG.Ret)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.010034 -0.005098 -0.001675 0.003929 0.018778
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.0162340 0.0002809 57.79 <2e-16 ***
## GOOG.Ret -0.0267257 0.0170239 -1.57 0.117
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.006297 on 501 degrees of freedom
## Multiple R-squared: 0.004895, Adjusted R-squared: 0.002909
## F-statistic: 2.465 on 1 and 501 DF, p-value: 0.1171
plot(AAPL.Vol~GOOG.Ret)
abline(lm(AAPL.Vol~GOOG.Ret))
#plot(AAPL.Ret, lm(AAPL.Vol~GOOG.Ret)$residuals)
plot(lm(AAPL.Vol~GOOG.Ret))
summary(lm(AAPL.Vol~AMZN.Ret))
##
## Call:
## lm(formula = AAPL.Vol ~ AMZN.Ret)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.010004 -0.005099 -0.001600 0.003971 0.019868
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.0162477 0.0002807 57.886 <2e-16 ***
## AMZN.Ret -0.0293726 0.0147407 -1.993 0.0468 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.006288 on 501 degrees of freedom
## Multiple R-squared: 0.007863, Adjusted R-squared: 0.005883
## F-statistic: 3.971 on 1 and 501 DF, p-value: 0.04684
plot(AAPL.Vol~AMZN.Ret)
abline(lm(AAPL.Vol~AMZN.Ret))
# plot(AMZN.Ret, lm(AAPL.Vol~AMZN.Ret)$residuals)
plot(lm(AAPL.Vol~AMZN.Ret))
summary(lm(AAPL.Vol~GSPC.Ret))
##
## Call:
## lm(formula = AAPL.Vol ~ GSPC.Ret)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.010180 -0.004948 -0.001665 0.004012 0.019477
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.016256 0.000279 58.274 < 2e-16 ***
## GSPC.Ret -0.092991 0.029554 -3.147 0.00175 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.006251 on 501 degrees of freedom
## Multiple R-squared: 0.01938, Adjusted R-squared: 0.01742
## F-statistic: 9.9 on 1 and 501 DF, p-value: 0.001751
plot(AAPL.Vol~GSPC.Ret)
abline(lm(AAPL.Vol~GSPC.Ret))
# plot(GSPC.Ret, lm(AAPL.Vol~GSPC.Ret)$residuals)
plot(lm(AAPL.Vol~GSPC.Ret))
summary(lm(AAPL.Vol~VXAPLCLS))
##
## Call:
## lm(formula = AAPL.Vol ~ VXAPLCLS)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.0114745 -0.0036975 -0.0005408 0.0032929 0.0189228
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -3.550e-03 1.025e-03 -3.465 0.000576 ***
## VXAPLCLS 7.169e-04 3.635e-05 19.719 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.004737 on 501 degrees of freedom
## Multiple R-squared: 0.437, Adjusted R-squared: 0.4359
## F-statistic: 388.9 on 1 and 501 DF, p-value: < 2.2e-16
plot(AAPL.Vol~VXAPLCLS)
abline(lm(AAPL.Vol~VXAPLCLS))
# plot(VXAPLCLS, lm(AAPL.Vol~VXAPLCLS)$residuals)
plot(lm(AAPL.Vol~VXAPLCLS))
summary(lm(AAPL.Vol~VXGOGCLS))
##
## Call:
## lm(formula = AAPL.Vol ~ VXGOGCLS)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.0117181 -0.0033167 -0.0002358 0.0028568 0.0172900
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -2.997e-03 9.787e-04 -3.062 0.00232 **
## VXGOGCLS 7.761e-04 3.861e-05 20.102 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.004697 on 501 degrees of freedom
## Multiple R-squared: 0.4465, Adjusted R-squared: 0.4453
## F-statistic: 404.1 on 1 and 501 DF, p-value: < 2.2e-16
plot(AAPL.Vol~VXGOGCLS)
abline(lm(AAPL.Vol~VXGOGCLS))
# plot(VXGOGCLS, lm(AAPL.Vol~VXGOGCLS)$residuals)
plot(lm(AAPL.Vol~VXGOGCLS))
summary(lm(AAPL.Vol~VXAZNCLS))
##
## Call:
## lm(formula = AAPL.Vol ~ VXAZNCLS)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.0112543 -0.0029578 -0.0002878 0.0026793 0.0170604
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -5.448e-04 7.839e-04 -0.695 0.487
## VXAZNCLS 5.523e-04 2.497e-05 22.120 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.00449 on 501 degrees of freedom
## Multiple R-squared: 0.4941, Adjusted R-squared: 0.4931
## F-statistic: 489.3 on 1 and 501 DF, p-value: < 2.2e-16
plot(AAPL.Vol~VXAZNCLS)
abline(lm(AAPL.Vol~VXAZNCLS))
# plot(VXAZNCLS, lm(AAPL.Vol~VXAZNCLS)$residuals)
plot(lm(AAPL.Vol~VXAZNCLS))
summary(lm(AAPL.Vol~VIX.Close))
##
## Call:
## lm(formula = AAPL.Vol ~ VIX.Close)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.015174 -0.004049 -0.001003 0.003370 0.018191
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.658e-03 1.007e-03 4.627 4.74e-06 ***
## VIX.Close 7.221e-04 6.093e-05 11.851 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.005579 on 501 degrees of freedom
## Multiple R-squared: 0.2189, Adjusted R-squared: 0.2174
## F-statistic: 140.4 on 1 and 501 DF, p-value: < 2.2e-16
plot(AAPL.Vol~VIX.Close)
abline(lm(AAPL.Vol~VIX.Close))
# plot(VIX.Close, lm(AAPL.Vol~VIX.Close)$residuals)
plot(lm(AAPL.Vol~VIX.Close))
summary(lm(AAPL.Vol~Date))
##
## Call:
## lm(formula = AAPL.Vol ~ Date)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.010509 -0.004040 -0.001550 0.003462 0.018375
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.780e-02 5.556e-04 32.038 < 2e-16 ***
## Date -4.342e-06 1.322e-06 -3.285 0.00109 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.006246 on 501 degrees of freedom
## Multiple R-squared: 0.02109, Adjusted R-squared: 0.01914
## F-statistic: 10.79 on 1 and 501 DF, p-value: 0.00109
plot(AAPL.Vol~Date)
abline(lm(AAPL.Vol~Date))
# plot(VXAZNCLS, lm(AAPL.Vol~VXAZNCLS)$residuals)
plot(lm(AAPL.Vol~Date))
X = Date^2
Y = AAPL.Vol
summary(lm(Y ~ X))
##
## Call:
## lm(formula = Y ~ X)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.010907 -0.003706 -0.001486 0.003309 0.017932
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.778e-02 4.115e-04 43.199 < 2e-16 ***
## X -8.802e-09 1.735e-09 -5.075 5.48e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.006157 on 501 degrees of freedom
## Multiple R-squared: 0.04889, Adjusted R-squared: 0.04699
## F-statistic: 25.75 on 1 and 501 DF, p-value: 5.482e-07
mod = lm(formula = Y ~ X)
plot(X, Y)
abline(mod)
plot(lm(Y~X))
It’s very hard to transform the data between
AAPL vs Closing prices: For most of the transformations in regards to Apple closing values, Transformations do not seem to make quite an impact on our data. One exception may be the first one of Apple.vol vs Apple.Close, which does seem to make the residuals a bit more normal, however, are skewed heavily towards one side or the other.
c <- lm(AAPL.Vol ~ AAPL.Close)
d <- lm(log(AAPL.Vol) ~ AAPL.Close)
### Some transforms of Appl Volitility vs Appl Close
par(mfrow=c(2,2)) # <- Remove this line if you want graphs not grouped
plot(fitted(c), resid(c), col = alpha("grey36", 0.5), pch = 20,
xlab = "AAPL.Close", ylab = "Residuals", main = "Apple Close versus Apple vol Residuals")
abline(h = 0, col = "darkorange", lwd = 2)
plot(fitted(d), resid(d), col = alpha("grey36", 0.5), pch = 20,
xlab = "AAPL.Close", ylab = "Log of Residuals", main = "Apple Close versus Apple vol Residuals")
abline(h = 0, col = "darkorange", lwd = 2)
plot(AAPL.Close, rstandard(c), col = alpha("grey36", 0.5), pch = 20,
xlab = "AAPL.Close", ylab = "Standardized Residuals", main = "Apple close versus Apple vol Residuals")
abline(h = 0, col = "darkorange", lwd = 2)
plot(AAPL.Close, rstandard(d), col = alpha("grey36", 0.5), pch = 20,
xlab = "AAPL.Close", ylab = "Standardized Residuals", main = "Apple close versus Apple vol Residuals")
abline(h = 0, col = "darkorange", lwd = 2)
plot(c, which = 1)
plot(d, which = 1)
c <- lm(AAPL.Vol ~ AMZN.Close)
d <- lm(sqrt(AAPL.Vol) ~ AMZN.Close)
### Some transforms of Appl Volitility vs AMZN Close
#par(mfrow=c(2,2)) # <- Remove this line if you want graphs not grouped
plot(AMZN.Close, resid(c), col = alpha("grey36", 0.5), pch = 20,
xlab = "AMZN.Close", ylab = "Residuals", main = "AMZN Close versus Appl Vol Residuals")
abline(h = 0, col = "darkorange", lwd = 2)
plot(AMZN.Close, resid(d), col = alpha("grey36", 0.5), pch = 20,
xlab = "AMZN.Close", ylab = "Squared root of Residuals", main = "AMZN Close versus Sqrt Appl Vol Residuals")
abline(h = 0, col = "darkorange", lwd = 2)
plot(AMZN.Close, rstandard(c), col = alpha("grey36", 0.5), pch = 20,
xlab = "AMZN.Close", ylab = "Standardized Residuals", main = "AMZN Close versus Appl Vol Residuals")
abline(h = 0, col = "darkorange", lwd = 2)
plot(AMZN.Close, rstandard(d), col = alpha("grey36", 0.5), pch = 20,
xlab = "AMZN.Close", ylab = "Standardized Residuals", main = "AMZN Close versus Sqrt Appl Vol Residuals")
abline(h = 0, col = "darkorange", lwd = 2)
plot(c, which = 1)
plot(d, which = 1)
For Transformation on google, seems that it is not neccesary as it barely makes any difference upon analyzing the regressions as the graphs barely change.
c <- lm(AAPL.Vol ~ GOOG.Close)
d <- lm(sqrt(AAPL.Vol) ~ GOOG.Close)
### Some transforms of Appl Volitility vs Google Close
par(mfrow=c(2,2)) # <- Remove this line if you want graphs not grouped
plot(GOOG.Close, resid(c), col = alpha("grey36", 0.5), pch = 20,
xlab = "GOOG.Close", ylab = "Residuals", main = "Google Close versus Appl Vol Residuals")
abline(h = 0, col = "darkorange", lwd = 2)
plot(GOOG.Close, resid(d), col = alpha("grey36", 0.5), pch = 20,
xlab = "GOOG.Close", ylab = "Squared root of Residuals", main = "Google Close versus Sqrt Appl Vol Residuals")
abline(h = 0, col = "darkorange", lwd = 2)
plot(GOOG.Close, rstandard(c), col = alpha("grey36", 0.5), pch = 20,
xlab = "GOOG.Close", ylab = "Standardized Residuals", main = "Google Close versus Appl Vol Residuals")
abline(h = 0, col = "darkorange", lwd = 2)
plot(GOOG.Close, rstandard(d), col = alpha("grey36", 0.5), pch = 20,
xlab = "GOOG.Close", ylab = "Standardized Residuals", main = "Google Close versus Sqrt Appl Vol Residuals")
abline(h = 0, col = "darkorange", lwd = 2)
plot(c, which = 1)
plot(d, which = 1)
For the S&P 500, not much seems to suggest that a transform changes much. Standardized residuals also reveal only a few outliers very close to 3. With a transform on the x-axis (to make up for the curve in residual trendline) and y-axis in attempt to make the residuals linear,there abnormality suggests its not any kind of linear relationship
c <- lm(AAPL.Vol ~ GSPC.Close)
d <- lm(sqrt(AAPL.Vol) ~ GSPC.Close^2)
### Some transforms of Appl Volitility vs Google Close
#par(mfrow=c(2,2)) # <- Remove this line if you want graphs not grouped
plot(GSPC.Close, resid(c), col = alpha("grey36", 0.5), pch = 20,
xlab = "GSPC.Close", ylab = "Residuals", main = "S&P 500 Close versus Appl Vol Residuals")
abline(h = 0, col = "darkorange", lwd = 2)
plot(GSPC.Close, resid(d), col = alpha("grey36", 0.5), pch = 20,
xlab = "GSPC.Close", ylab = "Squared root of Residuals", main = "S&P 500^2 Close versus Sqrt Appl Vol Residuals")
abline(h = 0, col = "darkorange", lwd = 2)
plot(GSPC.Close, rstandard(c), col = alpha("grey36", 0.5), pch = 20,
xlab = "GSPC.Close", ylab = "Standardized Residuals", main = "S&P 500 Close versus Appl Vol Residuals")
abline(h = 0, col = "darkorange", lwd = 2)
plot(GSPC.Close, rstandard(d), col = alpha("grey36", 0.5), pch = 20,
xlab = "GSPC.Close", ylab = "Standardized Residuals", main = "S&P 500^2 Close versus Sqrt Appl Vol Residuals")
abline(h = 0, col = "darkorange", lwd = 2)
plot(c, which = 1)
plot(d, which = 1)
The more interesting variables we have are the VIX variables, as they have the most correlation with the predicted APPL.vol. And it makes sense because the VIX is calculated to help predict stock volatility using options and other macroeconomic factors other than by directly a stock’s return or close price
c <- lm(AAPL.Vol ~ VIX.Close)
d <- lm(log(AAPL.Vol) ~ VIX.Close)
### Some transforms of Appl Volitility vs Google Close
#par(mfrow=c(2,2)) # <- Remove this line if you want graphs not grouped
plot(VIX.Close, resid(c), col = alpha("grey36", 0.5), pch = 20,
xlab = "VIX.Close", ylab = "Residuals", main = "VIX Close versus Appl Vol Residuals")
abline(h = 0, col = "darkorange", lwd = 2)
plot(VIX.Close, resid(d), col = alpha("grey36", 0.5), pch = 20,
xlab = "VIX.Close", ylab = "Log of Residuals", main = "VIX Close versus Sqrt Appl Vol Residuals")
abline(h = 0, col = "darkorange", lwd = 2)
plot(c, which = 1)
plot(d, which = 1)
After the Transform of Log(Appl.vol), the residual plots became a lot more distributed like a normal distribution, though not uniformly distributed throughout the graph. However, a Logarithmic transformation on the Y-value certainly made an improvement to our model, and brings the possibility of using a transform to analyze our correlations to perhaps get a better model.
c <- lm(AAPL.Vol ~ VXAPLCLS)
d <- lm(log(AAPL.Vol) ~ VXAPLCLS)
### Some transforms of Appl Volitility vs Google Close
#par(mfrow=c(2,2)) # <- Remove this line if you want graphs not grouped
plot(VXAPLCLS, resid(c), col = alpha("grey36", 0.5), pch = 20,
xlab = "VXAPLCLS", ylab = "Residuals", main = "VIX Apple versus Appl Vol Residuals")
abline(h = 0, col = "darkorange", lwd = 2)
plot(VXAPLCLS, resid(d), col = alpha("grey36", 0.5), pch = 20,
xlab = "VXAPLCLS", ylab = "Log of Residuals", main = "VIX Apple versus Sqrt Appl Vol Residuals")
abline(h = 0, col = "darkorange", lwd = 2)
plot(c, which = 1)
plot(d, which = 1)
The Log transformation of Appl.vol with Appl.vix is an even better result of transformation. The values arent as left skewed, and better reflects the linear relationship between the two variables.
c <- lm(AAPL.Vol ~ VXAZNCLS)
d <- lm(log(AAPL.Vol) ~ VXAZNCLS)
### Some transforms of Appl Volitility vs Google Close
#par(mfrow=c(2,2)) # <- Remove this line if you want graphs not grouped
plot(VXAZNCLS, resid(c), col = alpha("grey36", 0.5), pch = 20,
xlab = "VXAZNCLS", ylab = "Residuals", main = "VIX AMZN versus Appl Vol Residuals")
abline(h = 0, col = "darkorange", lwd = 2)
plot(VXAZNCLS, resid(d), col = alpha("grey36", 0.5), pch = 20,
xlab = "VXAZNCLS", ylab = "Log of Residuals", main = "VIX AMZN versus Sqrt Appl Vol Residuals")
abline(h = 0, col = "darkorange", lwd = 2)
plot(c, which = 1)
plot(d, which = 1)
The same transformations of using logarithmic transform also works very well to better serve our linear model. the results are much more normal than without a transform.
c <- lm(AAPL.Vol ~ VXGOGCLS)
d <- lm(log(AAPL.Vol) ~ VXGOGCLS)
### Some transforms of Appl Volitility vs Google Close
#par(mfrow=c(2,2)) # <- Remove this line if you want graphs not grouped
plot(VXGOGCLS, resid(c), col = alpha("grey36", 0.5), pch = 20,
xlab = "VXGOGCLS", ylab = "Residuals", main = "VIX GOOG versus Appl Vol Residuals")
abline(h = 0, col = "darkorange", lwd = 2)
plot(VXGOGCLS, resid(d), col = alpha("grey36", 0.5), pch = 20,
xlab = "VXGOGCLS", ylab = "Log of Residuals", main = "VIX GOOG versus Sqrt Appl Vol Residuals")
abline(h = 0, col = "darkorange", lwd = 2)
plot(c, which = 1)
plot(d, which = 1)
Conclusion: Using a logarithmic transformation upon our VIX variables in our models would be better to show a much more linear relationship between our data.