setwd("C:/Users/x/Desktop/Homework/")
require(data.table)
## Loading required package: data.table
X <- fread("d-3stocks9908.txt")
fBasics::basicStats(X$axp)
## X..X.axp
## nobs 2515.000000
## NAs 0.000000
## Minimum -0.175949
## Maximum 0.179266
## 1. Quartile -0.011110
## 3. Quartile 0.010929
## Mean 0.000146
## Median -0.000182
## Sum 0.366319
## SE Mean 0.000488
## LCL Mean -0.000811
## UCL Mean 0.001102
## Variance 0.000598
## Stdev 0.024462
## Skewness -0.034606
## Kurtosis 6.048051
fBasics::basicStats(X$cat)
## X..X.cat
## nobs 2515.000000
## NAs 0.000000
## Minimum -0.145175
## Maximum 0.147229
## 1. Quartile -0.011442
## 3. Quartile 0.012061
## Mean 0.000595
## Median 0.000489
## Sum 1.496517
## SE Mean 0.000433
## LCL Mean -0.000253
## UCL Mean 0.001443
## Variance 0.000471
## Stdev 0.021696
## Skewness 0.011671
## Kurtosis 4.453264
fBasics::basicStats(X$sbux)
## X..X.sbux
## nobs 2515.000000
## NAs 0.000000
## Minimum -0.282862
## Maximum 0.146354
## 1. Quartile -0.012474
## 3. Quartile 0.012488
## Mean 0.000481
## Median -0.000512
## Sum 1.208550
## SE Mean 0.000535
## LCL Mean -0.000568
## UCL Mean 0.001529
## Variance 0.000720
## Stdev 0.026826
## Skewness -0.082427
## Kurtosis 8.745578
lgaxp <- log10(1+X$axp)
lgcat <- log10(1+X$cat)
lgsbux <- log10(1+X$sbux)
fBasics::basicStats(lgaxp)
## lgaxp
## nobs 2515.000000
## NAs 0.000000
## Minimum -0.084046
## Maximum 0.071612
## 1. Quartile -0.004852
## 3. Quartile 0.004721
## Mean -0.000067
## Median -0.000079
## Sum -0.168580
## SE Mean 0.000212
## LCL Mean -0.000484
## UCL Mean 0.000350
## Variance 0.000113
## Stdev 0.010653
## Skewness -0.336435
## Kurtosis 6.486498
fBasics::basicStats(lgcat)
## lgcat
## nobs 2515.000000
## NAs 0.000000
## Minimum -0.068123
## Maximum 0.059650
## 1. Quartile -0.004998
## 3. Quartile 0.005207
## Mean 0.000156
## Median 0.000212
## Sum 0.392651
## SE Mean 0.000188
## LCL Mean -0.000213
## UCL Mean 0.000525
## Variance 0.000089
## Stdev 0.009431
## Skewness -0.201745
## Kurtosis 4.694747
fBasics::basicStats(lgsbux)
## lgsbux
## nobs 2515.000000
## NAs 0.000000
## Minimum -0.144397
## Maximum 0.059319
## 1. Quartile -0.005452
## 3. Quartile 0.005390
## Mean 0.000052
## Median -0.000222
## Sum 0.129818
## SE Mean 0.000233
## LCL Mean -0.000406
## UCL Mean 0.000509
## Variance 0.000137
## Stdev 0.011708
## Skewness -0.597068
## Kurtosis 12.895473
t.test(lgaxp)
##
## One Sample t-test
##
## data: lgaxp
## t = -0.31555, df = 2514, p-value = 0.7524
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## -0.0004835655 0.0003495056
## sample estimates:
## mean of x
## -6.702992e-05
hist(lgaxp, nclass = 40)
d1 <- density(lgaxp)
plot(d1$x,d1$y, type='l')
## 1.e
require(fBasics)
## Loading required package: fBasics
## Loading required package: timeDate
## Loading required package: timeSeries
normalTest(lgaxp,method="jb")
##
## Title:
## Jarque - Bera Normalality Test
##
## Test Results:
## STATISTIC:
## X-squared: 4466.8422
## P VALUE:
## Asymptotic p Value: < 2.2e-16
##
## Description:
## Fri Jul 12 13:51:04 2019 by user: x
shapiro.test(lgaxp)
##
## Shapiro-Wilk normality test
##
## data: lgaxp
## W = 0.92974, p-value < 2.2e-16
Z <- fread("m-gm3dx7508.txt")
fBasics::basicStats(Z$gm)
## X..Z.gm
## nobs 408.000000
## NAs 0.000000
## Minimum -0.389313
## Maximum 0.276619
## 1. Quartile -0.043488
## 3. Quartile 0.054502
## Mean 0.005568
## Median 0.006781
## Sum 2.271560
## SE Mean 0.004591
## LCL Mean -0.003457
## UCL Mean 0.014592
## Variance 0.008598
## Stdev 0.092727
## Skewness -0.383475
## Kurtosis 2.048076
fBasics::basicStats(Z$vw)
## X..Z.vw
## nobs 408.000000
## NAs 0.000000
## Minimum -0.225363
## Maximum 0.141600
## 1. Quartile -0.015835
## 3. Quartile 0.039953
## Mean 0.010118
## Median 0.013880
## Sum 4.128138
## SE Mean 0.002232
## LCL Mean 0.005731
## UCL Mean 0.014505
## Variance 0.002032
## Stdev 0.045075
## Skewness -0.742662
## Kurtosis 2.666032
fBasics::basicStats(Z$ew)
## X..Z.ew
## nobs 408.000000
## NAs 0.000000
## Minimum -0.272248
## Maximum 0.299260
## 1. Quartile -0.016842
## 3. Quartile 0.045644
## Mean 0.013314
## Median 0.016172
## Sum 5.432049
## SE Mean 0.002770
## LCL Mean 0.007868
## UCL Mean 0.018760
## Variance 0.003131
## Stdev 0.055959
## Skewness -0.300123
## Kurtosis 4.333664
fBasics::basicStats(Z$sp)
## X..Z.sp
## nobs 408.000000
## NAs 0.000000
## Minimum -0.217630
## Maximum 0.131767
## 1. Quartile -0.017624
## 3. Quartile 0.035984
## Mean 0.007301
## Median 0.010036
## Sum 2.978744
## SE Mean 0.002158
## LCL Mean 0.003058
## UCL Mean 0.011544
## Variance 0.001901
## Stdev 0.043599
## Skewness -0.570545
## Kurtosis 2.268600
lggm <- log10(1+Z$gm)
lgvw <- log10(1+Z$vw)
lgew <- log10(1+Z$ew)
lgsp <- log10(1+Z$sp)
fBasics::basicStats(lggm)
## lggm
## nobs 408.000000
## NAs 0.000000
## Minimum -0.214181
## Maximum 0.106061
## 1. Quartile -0.019310
## 3. Quartile 0.023047
## Mean 0.000479
## Median 0.002935
## Sum 0.195234
## SE Mean 0.002062
## LCL Mean -0.003575
## UCL Mean 0.004532
## Variance 0.001735
## Stdev 0.041650
## Skewness -1.023664
## Kurtosis 4.020752
fBasics::basicStats(lgvw)
## lgvw
## nobs 408.000000
## NAs 0.000000
## Minimum -0.110902
## Maximum 0.057514
## 1. Quartile -0.006932
## 3. Quartile 0.017014
## Mean 0.003928
## Median 0.005986
## Sum 1.602822
## SE Mean 0.000981
## LCL Mean 0.002001
## UCL Mean 0.005856
## Variance 0.000392
## Stdev 0.019806
## Skewness -1.051001
## Kurtosis 3.937548
fBasics::basicStats(lgew)
## lgew
## nobs 408.000000
## NAs 0.000000
## Minimum -0.138017
## Maximum 0.113696
## 1. Quartile -0.007376
## 3. Quartile 0.019384
## Mean 0.005068
## Median 0.006967
## Sum 2.067827
## SE Mean 0.001210
## LCL Mean 0.002690
## UCL Mean 0.007446
## Variance 0.000597
## Stdev 0.024433
## Skewness -0.836133
## Kurtosis 5.242452
fBasics::basicStats(lgsp)
## lgsp
## nobs 408.000000
## NAs 0.000000
## Minimum -0.106588
## Maximum 0.053757
## 1. Quartile -0.007722
## 3. Quartile 0.015353
## Mean 0.002744
## Median 0.004337
## Sum 1.119743
## SE Mean 0.000947
## LCL Mean 0.000884
## UCL Mean 0.004605
## Variance 0.000366
## Stdev 0.019119
## Skewness -0.854843
## Kurtosis 3.334693
t.test(lggm)
##
## One Sample t-test
##
## data: lggm
## t = 0.23206, df = 407, p-value = 0.8166
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## -0.003574954 0.004531983
## sample estimates:
## mean of x
## 0.0004785143
hist(lggm, nclass = 40)
d2 <- density(lggm)
plot(d2$x,d2$y, type='l')
## 2.e
require(fBasics)
normalTest(lggm,method="jb")
##
## Title:
## Jarque - Bera Normalality Test
##
## Test Results:
## STATISTIC:
## X-squared: 351.3549
## P VALUE:
## Asymptotic p Value: < 2.2e-16
##
## Description:
## Fri Jul 12 13:51:04 2019 by user: x
shapiro.test(lggm)
##
## Shapiro-Wilk normality test
##
## data: lggm
## W = 0.93851, p-value = 6.013e-12
t.test(Z$vw,mu=0, conf.level = 0.95)
##
## One Sample t-test
##
## data: Z$vw
## t = 4.5341, df = 407, p-value = 7.619e-06
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## 0.005731219 0.014504752
## sample estimates:
## mean of x
## 0.01011799
require(moments)
## Loading required package: moments
##
## Attaching package: 'moments'
## The following objects are masked from 'package:timeDate':
##
## kurtosis, skewness
agostino.test(Z$vw)
##
## D'Agostino skewness test
##
## data: Z$vw
## skew = -0.7454, z = -5.6340, p-value = 1.761e-08
## alternative hypothesis: data have a skewness
anscombe.test(Z$vw)
##
## Anscombe-Glynn kurtosis test
##
## data: Z$vw
## kurt = 5.6939, z = 5.5259, p-value = 3.278e-08
## alternative hypothesis: kurtosis is not equal to 3
agostino.test(lgaxp)
##
## D'Agostino skewness test
##
## data: lgaxp
## skew = -0.33664, z = -6.73182, p-value = 1.675e-11
## alternative hypothesis: data have a skewness
anscombe.test(lgaxp)
##
## Anscombe-Glynn kurtosis test
##
## data: lgaxp
## kurt = 9.494, z = 18.515, p-value < 2.2e-16
## alternative hypothesis: kurtosis is not equal to 3
S <- fread("d-exuseu.txt")
lgr <- diff(log10(S$VALUE))
fBasics::basicStats(lgr)
## lgr
## nobs 3566.000000
## NAs 0.000000
## Minimum -0.013042
## Maximum 0.020068
## 1. Quartile -0.001557
## 3. Quartile 0.001636
## Mean 0.000012
## Median 0.000000
## Sum 0.041353
## SE Mean 0.000047
## LCL Mean -0.000081
## UCL Mean 0.000104
## Variance 0.000008
## Stdev 0.002828
## Skewness 0.116750
## Kurtosis 2.058184
d3 <- density(lgr)
plot(d3$x,d3$y, type='l')
## 5.d
t.test(lgr)
##
## One Sample t-test
##
## data: lgr
## t = 0.24489, df = 3565, p-value = 0.8066
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## -8.124509e-05 1.044378e-04
## sample estimates:
## mean of x
## 1.159635e-05
IBM <- fread("IBM.csv")
SP <- fread("SP.csv")
lgIBM <- diff(log10(IBM$IBM.Adjusted))
lgSP <- diff(log10(SP$SP.Adjusted))
Mdata <- data.frame(x = lgSP, y = lgIBM)
require(ggplot2)
## Loading required package: ggplot2
qplot(Mdata$x,Mdata$y)
## 6.c
require(MASS)
## Loading required package: MASS
m1 <- lm(Mdata$y ~ Mdata$x, Mdata)
summary(m1)
##
## Call:
## lm(formula = Mdata$y ~ Mdata$x, data = Mdata)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.037754 -0.002711 0.000156 0.002952 0.040514
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 7.797e-05 1.018e-04 0.766 0.444
## Mdata$x 2.314e-01 1.100e-02 21.045 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.005681 on 3111 degrees of freedom
## Multiple R-squared: 0.1246, Adjusted R-squared: 0.1243
## F-statistic: 442.9 on 1 and 3111 DF, p-value: < 2.2e-16
AIC(m1)
## [1] -23353.64
m2 <- lm(Mdata$y ~ -1 + Mdata$x, Mdata)
summary(m2)
##
## Call:
## lm(formula = Mdata$y ~ -1 + Mdata$x, data = Mdata)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.037676 -0.002632 0.000234 0.003030 0.040590
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## Mdata$x 0.2315 0.0110 21.05 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.005681 on 3112 degrees of freedom
## Multiple R-squared: 0.1247, Adjusted R-squared: 0.1244
## F-statistic: 443.2 on 1 and 3112 DF, p-value: < 2.2e-16
AIC(m2)
## [1] -23355.05
idx <- c(1:length(Mdata$y))[Mdata$y <= 0]
nsp <- rep(0,length(Mdata$y))
nsp[idx] = Mdata$y[idx]
c1 <- rep(0,length(Mdata$y))
c1[idx] = 1
xx <- data.frame(ibm = Mdata$y, sp = Mdata$x, c1, nsp)
head(xx)
## ibm sp c1 nsp
## 1 0.004619071 -0.022663940 0 0.000000000
## 2 -0.003949611 -0.011323076 1 -0.003949611
## 3 0.006548176 0.010409710 0 0.000000000
## 4 0.005107456 0.008039575 0 0.000000000
## 5 -0.005151428 -0.007354365 1 -0.005151428
## 6 -0.001055229 -0.003551851 1 -0.001055229
m3 <-lm(ibm ~ c1 + sp, xx)
summary(m3)
##
## Call:
## lm(formula = ibm ~ c1 + sp, data = xx)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.033694 -0.002193 0.000097 0.002365 0.039796
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.0038645 0.0001082 35.73 <2e-16 ***
## c1 -0.0077809 0.0001576 -49.37 <2e-16 ***
## sp 0.1260899 0.0085083 14.82 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.004255 on 3110 degrees of freedom
## Multiple R-squared: 0.5092, Adjusted R-squared: 0.5089
## F-statistic: 1613 on 2 and 3110 DF, p-value: < 2.2e-16
AIC(m3)
## [1] -25152.93
m4 <-lm(ibm ~ nsp + sp, xx)
summary(m4)
##
## Call:
## lm(formula = ibm ~ nsp + sp, data = xx)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.005498 -0.002023 -0.000993 0.000962 0.041940
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.609e-03 6.868e-05 37.99 <2e-16 ***
## nsp 1.234e+00 1.618e-02 76.25 <2e-16 ***
## sp 9.560e-02 6.734e-03 14.20 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.003354 on 3110 degrees of freedom
## Multiple R-squared: 0.6949, Adjusted R-squared: 0.6947
## F-statistic: 3542 on 2 and 3110 DF, p-value: < 2.2e-16
AIC(m4)
## [1] -26633.11
m5 <-lm(ibm ~ sp + c1 + nsp, xx)
summary(m5)
##
## Call:
## lm(formula = ibm ~ sp + c1 + nsp, data = xx)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.005336 -0.001281 -0.000143 0.000469 0.041269
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.992e-03 7.438e-05 53.67 <2e-16 ***
## sp 7.115e-02 5.922e-03 12.02 <2e-16 ***
## c1 -3.958e-03 1.263e-04 -31.34 <2e-16 ***
## nsp 9.689e-01 1.644e-02 58.94 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.002925 on 3109 degrees of freedom
## Multiple R-squared: 0.7682, Adjusted R-squared: 0.768
## F-statistic: 3434 on 3 and 3109 DF, p-value: < 2.2e-16
AIC(m5)
## [1] -27485.93
The best model with the minimum AIC is m5 <- lm(ibm ~ sp + c1 + nsp).