1

1.a

setwd("C:/Users/x/Desktop/Homework/")
require(data.table) 
## Loading required package: data.table
X <- fread("d-3stocks9908.txt")
fBasics::basicStats(X$axp)
##                X..X.axp
## nobs        2515.000000
## NAs            0.000000
## Minimum       -0.175949
## Maximum        0.179266
## 1. Quartile   -0.011110
## 3. Quartile    0.010929
## Mean           0.000146
## Median        -0.000182
## Sum            0.366319
## SE Mean        0.000488
## LCL Mean      -0.000811
## UCL Mean       0.001102
## Variance       0.000598
## Stdev          0.024462
## Skewness      -0.034606
## Kurtosis       6.048051
fBasics::basicStats(X$cat)
##                X..X.cat
## nobs        2515.000000
## NAs            0.000000
## Minimum       -0.145175
## Maximum        0.147229
## 1. Quartile   -0.011442
## 3. Quartile    0.012061
## Mean           0.000595
## Median         0.000489
## Sum            1.496517
## SE Mean        0.000433
## LCL Mean      -0.000253
## UCL Mean       0.001443
## Variance       0.000471
## Stdev          0.021696
## Skewness       0.011671
## Kurtosis       4.453264
fBasics::basicStats(X$sbux)
##               X..X.sbux
## nobs        2515.000000
## NAs            0.000000
## Minimum       -0.282862
## Maximum        0.146354
## 1. Quartile   -0.012474
## 3. Quartile    0.012488
## Mean           0.000481
## Median        -0.000512
## Sum            1.208550
## SE Mean        0.000535
## LCL Mean      -0.000568
## UCL Mean       0.001529
## Variance       0.000720
## Stdev          0.026826
## Skewness      -0.082427
## Kurtosis       8.745578

1.b

lgaxp <- log10(1+X$axp)        
lgcat <- log10(1+X$cat)
lgsbux <- log10(1+X$sbux)
fBasics::basicStats(lgaxp)
##                   lgaxp
## nobs        2515.000000
## NAs            0.000000
## Minimum       -0.084046
## Maximum        0.071612
## 1. Quartile   -0.004852
## 3. Quartile    0.004721
## Mean          -0.000067
## Median        -0.000079
## Sum           -0.168580
## SE Mean        0.000212
## LCL Mean      -0.000484
## UCL Mean       0.000350
## Variance       0.000113
## Stdev          0.010653
## Skewness      -0.336435
## Kurtosis       6.486498
fBasics::basicStats(lgcat)
##                   lgcat
## nobs        2515.000000
## NAs            0.000000
## Minimum       -0.068123
## Maximum        0.059650
## 1. Quartile   -0.004998
## 3. Quartile    0.005207
## Mean           0.000156
## Median         0.000212
## Sum            0.392651
## SE Mean        0.000188
## LCL Mean      -0.000213
## UCL Mean       0.000525
## Variance       0.000089
## Stdev          0.009431
## Skewness      -0.201745
## Kurtosis       4.694747
fBasics::basicStats(lgsbux)
##                  lgsbux
## nobs        2515.000000
## NAs            0.000000
## Minimum       -0.144397
## Maximum        0.059319
## 1. Quartile   -0.005452
## 3. Quartile    0.005390
## Mean           0.000052
## Median        -0.000222
## Sum            0.129818
## SE Mean        0.000233
## LCL Mean      -0.000406
## UCL Mean       0.000509
## Variance       0.000137
## Stdev          0.011708
## Skewness      -0.597068
## Kurtosis      12.895473

1.c

t.test(lgaxp)  
## 
##  One Sample t-test
## 
## data:  lgaxp
## t = -0.31555, df = 2514, p-value = 0.7524
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  -0.0004835655  0.0003495056
## sample estimates:
##     mean of x 
## -6.702992e-05

1.d

hist(lgaxp, nclass = 40)

d1 <- density(lgaxp)
plot(d1$x,d1$y, type='l')

## 1.e

require(fBasics)              
## Loading required package: fBasics
## Loading required package: timeDate
## Loading required package: timeSeries
normalTest(lgaxp,method="jb")
## 
## Title:
##  Jarque - Bera Normalality Test
## 
## Test Results:
##   STATISTIC:
##     X-squared: 4466.8422
##   P VALUE:
##     Asymptotic p Value: < 2.2e-16 
## 
## Description:
##  Fri Jul 12 13:51:04 2019 by user: x
shapiro.test(lgaxp)
## 
##  Shapiro-Wilk normality test
## 
## data:  lgaxp
## W = 0.92974, p-value < 2.2e-16

2

2.a

Z <- fread("m-gm3dx7508.txt")     
fBasics::basicStats(Z$gm)
##                X..Z.gm
## nobs        408.000000
## NAs           0.000000
## Minimum      -0.389313
## Maximum       0.276619
## 1. Quartile  -0.043488
## 3. Quartile   0.054502
## Mean          0.005568
## Median        0.006781
## Sum           2.271560
## SE Mean       0.004591
## LCL Mean     -0.003457
## UCL Mean      0.014592
## Variance      0.008598
## Stdev         0.092727
## Skewness     -0.383475
## Kurtosis      2.048076
fBasics::basicStats(Z$vw)
##                X..Z.vw
## nobs        408.000000
## NAs           0.000000
## Minimum      -0.225363
## Maximum       0.141600
## 1. Quartile  -0.015835
## 3. Quartile   0.039953
## Mean          0.010118
## Median        0.013880
## Sum           4.128138
## SE Mean       0.002232
## LCL Mean      0.005731
## UCL Mean      0.014505
## Variance      0.002032
## Stdev         0.045075
## Skewness     -0.742662
## Kurtosis      2.666032
fBasics::basicStats(Z$ew)
##                X..Z.ew
## nobs        408.000000
## NAs           0.000000
## Minimum      -0.272248
## Maximum       0.299260
## 1. Quartile  -0.016842
## 3. Quartile   0.045644
## Mean          0.013314
## Median        0.016172
## Sum           5.432049
## SE Mean       0.002770
## LCL Mean      0.007868
## UCL Mean      0.018760
## Variance      0.003131
## Stdev         0.055959
## Skewness     -0.300123
## Kurtosis      4.333664
fBasics::basicStats(Z$sp)
##                X..Z.sp
## nobs        408.000000
## NAs           0.000000
## Minimum      -0.217630
## Maximum       0.131767
## 1. Quartile  -0.017624
## 3. Quartile   0.035984
## Mean          0.007301
## Median        0.010036
## Sum           2.978744
## SE Mean       0.002158
## LCL Mean      0.003058
## UCL Mean      0.011544
## Variance      0.001901
## Stdev         0.043599
## Skewness     -0.570545
## Kurtosis      2.268600

2.b

lggm <- log10(1+Z$gm)        
lgvw <- log10(1+Z$vw)
lgew <- log10(1+Z$ew)
lgsp <- log10(1+Z$sp)
fBasics::basicStats(lggm)
##                   lggm
## nobs        408.000000
## NAs           0.000000
## Minimum      -0.214181
## Maximum       0.106061
## 1. Quartile  -0.019310
## 3. Quartile   0.023047
## Mean          0.000479
## Median        0.002935
## Sum           0.195234
## SE Mean       0.002062
## LCL Mean     -0.003575
## UCL Mean      0.004532
## Variance      0.001735
## Stdev         0.041650
## Skewness     -1.023664
## Kurtosis      4.020752
fBasics::basicStats(lgvw)
##                   lgvw
## nobs        408.000000
## NAs           0.000000
## Minimum      -0.110902
## Maximum       0.057514
## 1. Quartile  -0.006932
## 3. Quartile   0.017014
## Mean          0.003928
## Median        0.005986
## Sum           1.602822
## SE Mean       0.000981
## LCL Mean      0.002001
## UCL Mean      0.005856
## Variance      0.000392
## Stdev         0.019806
## Skewness     -1.051001
## Kurtosis      3.937548
fBasics::basicStats(lgew)
##                   lgew
## nobs        408.000000
## NAs           0.000000
## Minimum      -0.138017
## Maximum       0.113696
## 1. Quartile  -0.007376
## 3. Quartile   0.019384
## Mean          0.005068
## Median        0.006967
## Sum           2.067827
## SE Mean       0.001210
## LCL Mean      0.002690
## UCL Mean      0.007446
## Variance      0.000597
## Stdev         0.024433
## Skewness     -0.836133
## Kurtosis      5.242452
fBasics::basicStats(lgsp)
##                   lgsp
## nobs        408.000000
## NAs           0.000000
## Minimum      -0.106588
## Maximum       0.053757
## 1. Quartile  -0.007722
## 3. Quartile   0.015353
## Mean          0.002744
## Median        0.004337
## Sum           1.119743
## SE Mean       0.000947
## LCL Mean      0.000884
## UCL Mean      0.004605
## Variance      0.000366
## Stdev         0.019119
## Skewness     -0.854843
## Kurtosis      3.334693

2.c

t.test(lggm)
## 
##  One Sample t-test
## 
## data:  lggm
## t = 0.23206, df = 407, p-value = 0.8166
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  -0.003574954  0.004531983
## sample estimates:
##    mean of x 
## 0.0004785143

2.d

hist(lggm, nclass = 40)

d2 <- density(lggm)
plot(d2$x,d2$y, type='l')

## 2.e

require(fBasics)               
normalTest(lggm,method="jb")
## 
## Title:
##  Jarque - Bera Normalality Test
## 
## Test Results:
##   STATISTIC:
##     X-squared: 351.3549
##   P VALUE:
##     Asymptotic p Value: < 2.2e-16 
## 
## Description:
##  Fri Jul 12 13:51:04 2019 by user: x
shapiro.test(lggm)
## 
##  Shapiro-Wilk normality test
## 
## data:  lggm
## W = 0.93851, p-value = 6.013e-12

3

3.a

t.test(Z$vw,mu=0, conf.level = 0.95)   
## 
##  One Sample t-test
## 
## data:  Z$vw
## t = 4.5341, df = 407, p-value = 7.619e-06
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  0.005731219 0.014504752
## sample estimates:
##  mean of x 
## 0.01011799

3.b

require(moments)
## Loading required package: moments
## 
## Attaching package: 'moments'
## The following objects are masked from 'package:timeDate':
## 
##     kurtosis, skewness
agostino.test(Z$vw) 
## 
##  D'Agostino skewness test
## 
## data:  Z$vw
## skew = -0.7454, z = -5.6340, p-value = 1.761e-08
## alternative hypothesis: data have a skewness

3.c

anscombe.test(Z$vw)  
## 
##  Anscombe-Glynn kurtosis test
## 
## data:  Z$vw
## kurt = 5.6939, z = 5.5259, p-value = 3.278e-08
## alternative hypothesis: kurtosis is not equal to 3

4

4.a

agostino.test(lgaxp)
## 
##  D'Agostino skewness test
## 
## data:  lgaxp
## skew = -0.33664, z = -6.73182, p-value = 1.675e-11
## alternative hypothesis: data have a skewness

4.b

anscombe.test(lgaxp)
## 
##  Anscombe-Glynn kurtosis test
## 
## data:  lgaxp
## kurt = 9.494, z = 18.515, p-value < 2.2e-16
## alternative hypothesis: kurtosis is not equal to 3

5

5.a

S <- fread("d-exuseu.txt")
lgr <- diff(log10(S$VALUE))

5.b

fBasics::basicStats(lgr)
##                     lgr
## nobs        3566.000000
## NAs            0.000000
## Minimum       -0.013042
## Maximum        0.020068
## 1. Quartile   -0.001557
## 3. Quartile    0.001636
## Mean           0.000012
## Median         0.000000
## Sum            0.041353
## SE Mean        0.000047
## LCL Mean      -0.000081
## UCL Mean       0.000104
## Variance       0.000008
## Stdev          0.002828
## Skewness       0.116750
## Kurtosis       2.058184

5.c

d3 <- density(lgr)
plot(d3$x,d3$y, type='l')

## 5.d

t.test(lgr)
## 
##  One Sample t-test
## 
## data:  lgr
## t = 0.24489, df = 3565, p-value = 0.8066
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  -8.124509e-05  1.044378e-04
## sample estimates:
##    mean of x 
## 1.159635e-05

6

6.a

IBM <- fread("IBM.csv")
SP <- fread("SP.csv")
lgIBM <- diff(log10(IBM$IBM.Adjusted))
lgSP <- diff(log10(SP$SP.Adjusted))

6.b

Mdata <- data.frame(x = lgSP, y = lgIBM)
require(ggplot2)
## Loading required package: ggplot2
qplot(Mdata$x,Mdata$y)

## 6.c

require(MASS)
## Loading required package: MASS
m1 <- lm(Mdata$y ~ Mdata$x, Mdata)  
summary(m1)
## 
## Call:
## lm(formula = Mdata$y ~ Mdata$x, data = Mdata)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.037754 -0.002711  0.000156  0.002952  0.040514 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 7.797e-05  1.018e-04   0.766    0.444    
## Mdata$x     2.314e-01  1.100e-02  21.045   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.005681 on 3111 degrees of freedom
## Multiple R-squared:  0.1246, Adjusted R-squared:  0.1243 
## F-statistic: 442.9 on 1 and 3111 DF,  p-value: < 2.2e-16
AIC(m1)
## [1] -23353.64
m2 <- lm(Mdata$y ~ -1 + Mdata$x, Mdata)  
summary(m2)
## 
## Call:
## lm(formula = Mdata$y ~ -1 + Mdata$x, data = Mdata)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.037676 -0.002632  0.000234  0.003030  0.040590 
## 
## Coefficients:
##         Estimate Std. Error t value Pr(>|t|)    
## Mdata$x   0.2315     0.0110   21.05   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.005681 on 3112 degrees of freedom
## Multiple R-squared:  0.1247, Adjusted R-squared:  0.1244 
## F-statistic: 443.2 on 1 and 3112 DF,  p-value: < 2.2e-16
AIC(m2)
## [1] -23355.05
idx <- c(1:length(Mdata$y))[Mdata$y <= 0]   
nsp <- rep(0,length(Mdata$y))
nsp[idx] = Mdata$y[idx]
c1 <- rep(0,length(Mdata$y))
c1[idx] = 1
xx <- data.frame(ibm = Mdata$y, sp = Mdata$x, c1, nsp)
head(xx)
##            ibm           sp c1          nsp
## 1  0.004619071 -0.022663940  0  0.000000000
## 2 -0.003949611 -0.011323076  1 -0.003949611
## 3  0.006548176  0.010409710  0  0.000000000
## 4  0.005107456  0.008039575  0  0.000000000
## 5 -0.005151428 -0.007354365  1 -0.005151428
## 6 -0.001055229 -0.003551851  1 -0.001055229
m3 <-lm(ibm ~ c1 + sp, xx)
summary(m3)
## 
## Call:
## lm(formula = ibm ~ c1 + sp, data = xx)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.033694 -0.002193  0.000097  0.002365  0.039796 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.0038645  0.0001082   35.73   <2e-16 ***
## c1          -0.0077809  0.0001576  -49.37   <2e-16 ***
## sp           0.1260899  0.0085083   14.82   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.004255 on 3110 degrees of freedom
## Multiple R-squared:  0.5092, Adjusted R-squared:  0.5089 
## F-statistic:  1613 on 2 and 3110 DF,  p-value: < 2.2e-16
AIC(m3)
## [1] -25152.93
m4 <-lm(ibm ~ nsp + sp, xx)
summary(m4)
## 
## Call:
## lm(formula = ibm ~ nsp + sp, data = xx)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.005498 -0.002023 -0.000993  0.000962  0.041940 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 2.609e-03  6.868e-05   37.99   <2e-16 ***
## nsp         1.234e+00  1.618e-02   76.25   <2e-16 ***
## sp          9.560e-02  6.734e-03   14.20   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.003354 on 3110 degrees of freedom
## Multiple R-squared:  0.6949, Adjusted R-squared:  0.6947 
## F-statistic:  3542 on 2 and 3110 DF,  p-value: < 2.2e-16
AIC(m4)
## [1] -26633.11
m5 <-lm(ibm ~ sp + c1 + nsp, xx)
summary(m5)
## 
## Call:
## lm(formula = ibm ~ sp + c1 + nsp, data = xx)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.005336 -0.001281 -0.000143  0.000469  0.041269 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  3.992e-03  7.438e-05   53.67   <2e-16 ***
## sp           7.115e-02  5.922e-03   12.02   <2e-16 ***
## c1          -3.958e-03  1.263e-04  -31.34   <2e-16 ***
## nsp          9.689e-01  1.644e-02   58.94   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.002925 on 3109 degrees of freedom
## Multiple R-squared:  0.7682, Adjusted R-squared:  0.768 
## F-statistic:  3434 on 3 and 3109 DF,  p-value: < 2.2e-16
AIC(m5)
## [1] -27485.93

The best model with the minimum AIC is m5 <- lm(ibm ~ sp + c1 + nsp).