INITIAL ANALYSIS FOR THE FINAL CAPSTONE PROJECT
stock <- read.csv(paste("apple dataset.csv", sep=""))
summary(stock)
## Date Open High Low
## 1980-12-12: 1 Min. : 11.12 Min. : 11.12 Min. : 11.00
## 1980-12-15: 1 1st Qu.: 27.00 1st Qu.: 27.50 1st Qu.: 26.50
## 1980-12-16: 1 Median : 43.75 Median : 44.50 Median : 43.00
## 1980-12-17: 1 Mean :101.09 Mean :102.32 Mean : 99.75
## 1980-12-18: 1 3rd Qu.:109.70 3rd Qu.:111.17 3rd Qu.:108.37
## 1980-12-19: 1 Max. :702.41 Max. :705.07 Max. :699.57
## (Other) :9376
## Close Volume Dividend Split
## Min. : 11.00 Min. : 4471 Min. :0.000000 Min. :1.000
## 1st Qu.: 27.00 1st Qu.: 1230750 1st Qu.:0.000000 1st Qu.:1.000
## Median : 43.75 Median : 3759200 Median :0.000000 Median :1.000
## Mean :101.05 Mean : 11962851 Mean :0.003774 Mean :1.001
## 3rd Qu.:109.78 3rd Qu.: 17908300 3rd Qu.:0.000000 3rd Qu.:1.000
## Max. :702.10 Max. :189560600 Max. :3.290000 Max. :7.000
##
## Adj_Open Adj_High Adj_Low
## Min. : 0.1623 Min. : 0.1623 Min. : 0.1605
## 1st Qu.: 0.9157 1st Qu.: 0.9329 1st Qu.: 0.8966
## Median : 1.4256 Median : 1.4539 Median : 1.3969
## Mean : 21.1167 Mean : 21.3165 Mean : 20.9023
## 3rd Qu.: 19.6046 3rd Qu.: 19.9391 3rd Qu.: 19.2787
## Max. :179.1000 Max. :180.4800 Max. :178.1600
##
## Adj_Close Adj_Volume
## Min. : 0.1605 Min. :2.504e+05
## 1st Qu.: 0.9151 1st Qu.:3.475e+07
## Median : 1.4248 Median :6.080e+07
## Mean : 21.1149 Mean :8.873e+07
## 3rd Qu.: 19.6084 3rd Qu.:1.111e+08
## Max. :178.9700 Max. :1.855e+09
##
View(stock)
attach(stock)
library(psych)
describe(stock)
## vars n mean sd median trimmed
## Date* 1 9382 4691.50 2708.49 4691.50 4691.50
## Open 2 9382 101.09 135.26 43.75 65.94
## High 3 9382 102.32 136.38 44.50 66.93
## Low 4 9382 99.75 133.92 43.00 64.90
## Close 5 9382 101.05 135.18 43.75 65.93
## Volume 6 9382 11962850.54 16636897.77 3759200.00 8522664.50
## Dividend 7 9382 0.00 0.09 0.00 0.00
## Split 8 9382 1.00 0.06 1.00 1.00
## Adj_Open 9 9382 21.12 38.44 1.43 11.41
## Adj_High 10 9382 21.32 38.75 1.45 11.54
## Adj_Low 11 9382 20.90 38.12 1.40 11.26
## Adj_Close 12 9382 21.11 38.45 1.42 11.40
## Adj_Volume 13 9382 88725799.58 87086110.15 60796366.50 72971312.39
## mad min max range skew kurtosis
## Date* 3477.44 1.00 9.382000e+03 9.381000e+03 0.00 -1.20
## Open 32.81 11.12 7.024100e+02 6.912900e+02 2.42 5.23
## High 33.27 11.12 7.050700e+02 6.939500e+02 2.42 5.22
## Low 32.25 11.00 6.995700e+02 6.885700e+02 2.42 5.24
## Close 32.81 11.00 7.021000e+02 6.911000e+02 2.42 5.23
## Volume 4731940.29 4471.00 1.895606e+08 1.895561e+08 2.34 8.13
## Dividend 0.00 0.00 3.290000e+00 3.290000e+00 31.44 1040.79
## Split 0.00 1.00 7.000000e+00 6.000000e+00 87.07 8006.29
## Adj_Open 1.47 0.16 1.791000e+02 1.789400e+02 2.07 3.45
## Adj_High 1.50 0.16 1.804800e+02 1.803200e+02 2.07 3.44
## Adj_Low 1.44 0.16 1.781600e+02 1.780000e+02 2.08 3.48
## Adj_Close 1.47 0.16 1.789700e+02 1.788100e+02 2.08 3.46
## Adj_Volume 47243592.37 250376.00 1.855410e+09 1.855160e+09 3.42 28.71
## se
## Date* 27.96
## Open 1.40
## High 1.41
## Low 1.38
## Close 1.40
## Volume 171761.03
## Dividend 0.00
## Split 0.00
## Adj_Open 0.40
## Adj_High 0.40
## Adj_Low 0.39
## Adj_Close 0.40
## Adj_Volume 899085.88
mytable <- with(stock, table(Dividend))
mytable
## Dividend
## 0 0.08 0.1 0.11 0.12 0.47 0.52 0.57 0.63 2.65 3.05 3.29
## 9326 4 4 4 21 3 4 4 4 3 4 1
mytable1 <- with(stock, table(Split))
mytable1
## Split
## 1 2 7
## 9378 3 1
attach(stock)
## The following objects are masked from stock (pos = 4):
##
## Adj_Close, Adj_High, Adj_Low, Adj_Open, Adj_Volume, Close,
## Date, Dividend, High, Low, Open, Split, Volume
library(lattice)
histogram(~Open)
histogram(~Close)
boxplot(Open)
boxplot(Close)
boxplot(High)
boxplot(Low)
histogram(~Volume)
plot(~Close + Date , main = "Closing price of stock with day number" , pch =1)
library(car)
##
## Attaching package: 'car'
## The following object is masked from 'package:psych':
##
## logit
scatterplotMatrix(formula= ~ Close + Open + High + Low + Volume,cex=0.8)
cor(stock[, c(2:6 , 9:13)])
## Open High Low Close Volume Adj_Open
## Open 1.0000000 0.9999281 0.9999002 0.9998257 0.2882580 0.53307853
## High 0.9999281 1.0000000 0.9998790 0.9999201 0.2895081 0.53210141
## Low 0.9999002 0.9998790 1.0000000 0.9999135 0.2860077 0.53424426
## Close 0.9998257 0.9999201 0.9999135 1.0000000 0.2878256 0.53316338
## Volume 0.2882580 0.2895081 0.2860077 0.2878256 1.0000000 0.61196816
## Adj_Open 0.5330785 0.5321014 0.5342443 0.5331634 0.6119682 1.00000000
## Adj_High 0.5334196 0.5325140 0.5346118 0.5335876 0.6139130 0.99995700
## Adj_Low 0.5320958 0.5311451 0.5333616 0.5322755 0.6090673 0.99994498
## Adj_Close 0.5326582 0.5317641 0.5339189 0.5329171 0.6113204 0.99990481
## Adj_Volume 0.1916405 0.1944722 0.1874925 0.1911254 0.5403455 -0.05206478
## Adj_High Adj_Low Adj_Close Adj_Volume
## Open 0.53341960 0.53209578 0.53265821 0.19164055
## High 0.53251400 0.53114505 0.53176410 0.19447224
## Low 0.53461177 0.53336156 0.53391886 0.18749247
## Close 0.53358757 0.53227548 0.53291712 0.19112537
## Volume 0.61391304 0.60906726 0.61132042 0.54034546
## Adj_Open 0.99995700 0.99994498 0.99990481 -0.05206478
## Adj_High 1.00000000 0.99993011 0.99995486 -0.05071799
## Adj_Low 0.99993011 1.00000000 0.99995482 -0.05426231
## Adj_Close 0.99995486 0.99995482 1.00000000 -0.05249642
## Adj_Volume -0.05071799 -0.05426231 -0.05249642 1.00000000
library(corrgram)
corrgram(stock[, c(2:6 , 9:13)] , order = T, text.panel=panel.txt,lower.panel = panel.shade,upper.panel = panel.pie, main="Corrgram of all variables")
t.test(Open, Close)
##
## Welch Two Sample t-test
##
## data: Open and Close
## t = 0.018981, df = 18762, p-value = 0.9849
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -3.832215 3.907160
## sample estimates:
## mean of x mean of y
## 101.0878 101.0503
t.test(Low , Close)
##
## Welch Two Sample t-test
##
## data: Low and Close
## t = -0.66079, df = 18760, p-value = 0.5088
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -5.148646 2.552448
## sample estimates:
## mean of x mean of y
## 99.75224 101.05034
t.test(High , Close)
##
## Welch Two Sample t-test
##
## data: High and Close
## t = 0.6408, df = 18761, p-value = 0.5217
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -2.615459 5.156211
## sample estimates:
## mean of x mean of y
## 102.3207 101.0503
t.test(Volume , Close)
##
## Welch Two Sample t-test
##
## data: Volume and Close
## t = 69.648, df = 9381, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 11626061 12299438
## sample estimates:
## mean of x mean of y
## 1.196285e+07 1.010503e+02
t.test(Adj_High , Close)
##
## Welch Two Sample t-test
##
## data: Adj_High and Close
## t = -54.921, df = 10912, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -82.57958 -76.88808
## sample estimates:
## mean of x mean of y
## 21.31651 101.05034
t.test(Adj_Low, Close)
##
## Welch Two Sample t-test
##
## data: Adj_Low and Close
## t = -55.274, df = 10864, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -82.99038 -77.30578
## sample estimates:
## mean of x mean of y
## 20.90226 101.05034
t.test(Adj_Open , Close)
##
## Welch Two Sample t-test
##
## data: Adj_Open and Close
## t = -55.092, df = 10888, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -82.77773 -77.08962
## sample estimates:
## mean of x mean of y
## 21.11667 101.05034
t.test(Adj_Volume, Close)
##
## Welch Two Sample t-test
##
## data: Adj_Volume and Close
## t = 98.684, df = 9381, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 86963295 90488102
## sample estimates:
## mean of x mean of y
## 8.872580e+07 1.010503e+02
FROM THE T-TESTS PERFORMED ABOVE IT IS INFERRED THAT THE PRICING OF THE STOCK PRIMARILY DEPENDS ON ADJUSTED HIGH, ADJUSTED LOW, ADJUSTED OPEN, VOLUME, ADJUSTED VOLUME.