CAPSTONE PROJECT ON STOCK PRICE PREDICTION OF APPLE INC. BASED ON HISTORICAL STOCK MARKET DATA FROM 1980-2018 LISTED ON NASDAQ

stock <- read.csv(paste("apple dataset.csv", sep=""))
summary(stock)
##          Date           Open             High             Low        
##  1980-12-12:   1   Min.   : 11.12   Min.   : 11.12   Min.   : 11.00  
##  1980-12-15:   1   1st Qu.: 27.00   1st Qu.: 27.50   1st Qu.: 26.50  
##  1980-12-16:   1   Median : 43.75   Median : 44.50   Median : 43.00  
##  1980-12-17:   1   Mean   :101.09   Mean   :102.32   Mean   : 99.75  
##  1980-12-18:   1   3rd Qu.:109.70   3rd Qu.:111.17   3rd Qu.:108.37  
##  1980-12-19:   1   Max.   :702.41   Max.   :705.07   Max.   :699.57  
##  (Other)   :9376                                                     
##      Close            Volume             Dividend            Split      
##  Min.   : 11.00   Min.   :     4471   Min.   :0.000000   Min.   :1.000  
##  1st Qu.: 27.00   1st Qu.:  1230750   1st Qu.:0.000000   1st Qu.:1.000  
##  Median : 43.75   Median :  3759200   Median :0.000000   Median :1.000  
##  Mean   :101.05   Mean   : 11962851   Mean   :0.003774   Mean   :1.001  
##  3rd Qu.:109.78   3rd Qu.: 17908300   3rd Qu.:0.000000   3rd Qu.:1.000  
##  Max.   :702.10   Max.   :189560600   Max.   :3.290000   Max.   :7.000  
##                                                                         
##     Adj_Open           Adj_High           Adj_Low        
##  Min.   :  0.1623   Min.   :  0.1623   Min.   :  0.1605  
##  1st Qu.:  0.9157   1st Qu.:  0.9329   1st Qu.:  0.8966  
##  Median :  1.4256   Median :  1.4539   Median :  1.3969  
##  Mean   : 21.1167   Mean   : 21.3165   Mean   : 20.9023  
##  3rd Qu.: 19.6046   3rd Qu.: 19.9391   3rd Qu.: 19.2787  
##  Max.   :179.1000   Max.   :180.4800   Max.   :178.1600  
##                                                          
##    Adj_Close          Adj_Volume       
##  Min.   :  0.1605   Min.   :2.504e+05  
##  1st Qu.:  0.9151   1st Qu.:3.475e+07  
##  Median :  1.4248   Median :6.080e+07  
##  Mean   : 21.1149   Mean   :8.873e+07  
##  3rd Qu.: 19.6084   3rd Qu.:1.111e+08  
##  Max.   :178.9700   Max.   :1.855e+09  
## 
View(stock)
attach(stock)
library(psych)
describe(stock)
##            vars    n        mean          sd      median     trimmed
## Date*         1 9382     4691.50     2708.49     4691.50     4691.50
## Open          2 9382      101.09      135.26       43.75       65.94
## High          3 9382      102.32      136.38       44.50       66.93
## Low           4 9382       99.75      133.92       43.00       64.90
## Close         5 9382      101.05      135.18       43.75       65.93
## Volume        6 9382 11962850.54 16636897.77  3759200.00  8522664.50
## Dividend      7 9382        0.00        0.09        0.00        0.00
## Split         8 9382        1.00        0.06        1.00        1.00
## Adj_Open      9 9382       21.12       38.44        1.43       11.41
## Adj_High     10 9382       21.32       38.75        1.45       11.54
## Adj_Low      11 9382       20.90       38.12        1.40       11.26
## Adj_Close    12 9382       21.11       38.45        1.42       11.40
## Adj_Volume   13 9382 88725799.58 87086110.15 60796366.50 72971312.39
##                    mad       min          max        range  skew kurtosis
## Date*          3477.44      1.00 9.382000e+03 9.381000e+03  0.00    -1.20
## Open             32.81     11.12 7.024100e+02 6.912900e+02  2.42     5.23
## High             33.27     11.12 7.050700e+02 6.939500e+02  2.42     5.22
## Low              32.25     11.00 6.995700e+02 6.885700e+02  2.42     5.24
## Close            32.81     11.00 7.021000e+02 6.911000e+02  2.42     5.23
## Volume      4731940.29   4471.00 1.895606e+08 1.895561e+08  2.34     8.13
## Dividend          0.00      0.00 3.290000e+00 3.290000e+00 31.44  1040.79
## Split             0.00      1.00 7.000000e+00 6.000000e+00 87.07  8006.29
## Adj_Open          1.47      0.16 1.791000e+02 1.789400e+02  2.07     3.45
## Adj_High          1.50      0.16 1.804800e+02 1.803200e+02  2.07     3.44
## Adj_Low           1.44      0.16 1.781600e+02 1.780000e+02  2.08     3.48
## Adj_Close         1.47      0.16 1.789700e+02 1.788100e+02  2.08     3.46
## Adj_Volume 47243592.37 250376.00 1.855410e+09 1.855160e+09  3.42    28.71
##                   se
## Date*          27.96
## Open            1.40
## High            1.41
## Low             1.38
## Close           1.40
## Volume     171761.03
## Dividend        0.00
## Split           0.00
## Adj_Open        0.40
## Adj_High        0.40
## Adj_Low         0.39
## Adj_Close       0.40
## Adj_Volume 899085.88
mytable <- with(stock, table(Dividend))
mytable
## Dividend
##    0 0.08  0.1 0.11 0.12 0.47 0.52 0.57 0.63 2.65 3.05 3.29 
## 9326    4    4    4   21    3    4    4    4    3    4    1
mytable1 <- with(stock, table(Split))
mytable1
## Split
##    1    2    7 
## 9378    3    1
attach(stock)
## The following objects are masked from stock (pos = 4):
## 
##     Adj_Close, Adj_High, Adj_Low, Adj_Open, Adj_Volume, Close,
##     Date, Dividend, High, Low, Open, Split, Volume
library(lattice)
histogram(~Open) 

histogram(~Close) 

boxplot(Open)

boxplot(Close)

boxplot(High)

boxplot(Low) 

histogram(~Volume)

plot(~Close + Date , main = "Closing price of stock with day number" , pch =1)

library(car)
## 
## Attaching package: 'car'
## The following object is masked from 'package:psych':
## 
##     logit
scatterplotMatrix(formula= ~ Close + Open + High + Low + Volume,cex=0.8)

cor(stock[, c(2:6 , 9:13)])
##                 Open      High       Low     Close    Volume    Adj_Open
## Open       1.0000000 0.9999281 0.9999002 0.9998257 0.2882580  0.53307853
## High       0.9999281 1.0000000 0.9998790 0.9999201 0.2895081  0.53210141
## Low        0.9999002 0.9998790 1.0000000 0.9999135 0.2860077  0.53424426
## Close      0.9998257 0.9999201 0.9999135 1.0000000 0.2878256  0.53316338
## Volume     0.2882580 0.2895081 0.2860077 0.2878256 1.0000000  0.61196816
## Adj_Open   0.5330785 0.5321014 0.5342443 0.5331634 0.6119682  1.00000000
## Adj_High   0.5334196 0.5325140 0.5346118 0.5335876 0.6139130  0.99995700
## Adj_Low    0.5320958 0.5311451 0.5333616 0.5322755 0.6090673  0.99994498
## Adj_Close  0.5326582 0.5317641 0.5339189 0.5329171 0.6113204  0.99990481
## Adj_Volume 0.1916405 0.1944722 0.1874925 0.1911254 0.5403455 -0.05206478
##               Adj_High     Adj_Low   Adj_Close  Adj_Volume
## Open        0.53341960  0.53209578  0.53265821  0.19164055
## High        0.53251400  0.53114505  0.53176410  0.19447224
## Low         0.53461177  0.53336156  0.53391886  0.18749247
## Close       0.53358757  0.53227548  0.53291712  0.19112537
## Volume      0.61391304  0.60906726  0.61132042  0.54034546
## Adj_Open    0.99995700  0.99994498  0.99990481 -0.05206478
## Adj_High    1.00000000  0.99993011  0.99995486 -0.05071799
## Adj_Low     0.99993011  1.00000000  0.99995482 -0.05426231
## Adj_Close   0.99995486  0.99995482  1.00000000 -0.05249642
## Adj_Volume -0.05071799 -0.05426231 -0.05249642  1.00000000
library(corrgram)
corrgram(stock[, c(2:6 , 9:13)] , order = T, text.panel=panel.txt,lower.panel = panel.shade,upper.panel = panel.pie, main="Corrgram of all variables")

t.test(Open, Close)
## 
##  Welch Two Sample t-test
## 
## data:  Open and Close
## t = 0.018981, df = 18762, p-value = 0.9849
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -3.832215  3.907160
## sample estimates:
## mean of x mean of y 
##  101.0878  101.0503
t.test(Low , Close)
## 
##  Welch Two Sample t-test
## 
## data:  Low and Close
## t = -0.66079, df = 18760, p-value = 0.5088
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -5.148646  2.552448
## sample estimates:
## mean of x mean of y 
##  99.75224 101.05034
t.test(High , Close)
## 
##  Welch Two Sample t-test
## 
## data:  High and Close
## t = 0.6408, df = 18761, p-value = 0.5217
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -2.615459  5.156211
## sample estimates:
## mean of x mean of y 
##  102.3207  101.0503
t.test(Volume , Close)
## 
##  Welch Two Sample t-test
## 
## data:  Volume and Close
## t = 69.648, df = 9381, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  11626061 12299438
## sample estimates:
##    mean of x    mean of y 
## 1.196285e+07 1.010503e+02
t.test(Adj_High , Close)
## 
##  Welch Two Sample t-test
## 
## data:  Adj_High and Close
## t = -54.921, df = 10912, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -82.57958 -76.88808
## sample estimates:
## mean of x mean of y 
##  21.31651 101.05034
t.test(Adj_Low, Close)
## 
##  Welch Two Sample t-test
## 
## data:  Adj_Low and Close
## t = -55.274, df = 10864, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -82.99038 -77.30578
## sample estimates:
## mean of x mean of y 
##  20.90226 101.05034
t.test(Adj_Open , Close)
## 
##  Welch Two Sample t-test
## 
## data:  Adj_Open and Close
## t = -55.092, df = 10888, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -82.77773 -77.08962
## sample estimates:
## mean of x mean of y 
##  21.11667 101.05034
t.test(Adj_Volume, Close)
## 
##  Welch Two Sample t-test
## 
## data:  Adj_Volume and Close
## t = 98.684, df = 9381, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  86963295 90488102
## sample estimates:
##    mean of x    mean of y 
## 8.872580e+07 1.010503e+02

FROM THE T-TESTS PERFORMED ABOVE IT IS INFERRED THAT THE PRICING OF THE STOCK PRIMARILY DEPENDS ON ADJUSTED HIGH, ADJUSTED LOW, ADJUSTED OPEN, VOLUME, ADJUSTED VOLUME.

model <- lm(formula = Close ~ Volume + Adj_High + Adj_Open + Adj_Low + Adj_Volume)
summary(model)
## 
## Call:
## lm(formula = Close ~ Volume + Adj_High + Adj_Open + Adj_Low + 
##     Adj_Volume)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -859.61  -34.41  -10.76   11.93  429.87 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  1.357e+01  1.634e+00   8.307   <2e-16 ***
## Volume      -6.064e-06  1.264e-07 -47.963   <2e-16 ***
## Adj_High     7.037e+01  3.274e+00  21.493   <2e-16 ***
## Adj_Open     6.232e+00  3.371e+00   1.849   0.0645 .  
## Adj_Low     -7.418e+01  3.017e+00 -24.590   <2e-16 ***
## Adj_Volume   8.915e-07  1.716e-08  51.944   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 98.84 on 9376 degrees of freedom
## Multiple R-squared:  0.4657, Adjusted R-squared:  0.4654 
## F-statistic:  1634 on 5 and 9376 DF,  p-value: < 2.2e-16
plot(model)