Project Title: An analysis of NSE Stock Exchange dataset

NAME: SHIVAPRAKASH.B

EMAIL: shivaprakash.balakrishnan@gmail.com

COLLEGE: NATIONAL INSTITUTE OF TECHNOLOGY, TIRUCHIRAPPALLI

This R Markdown file is about the analysis of the NSE Stock Exchange dataset which shows the market prices of various companies at various equity values and at different time, and on different days.

COLUMN DESCRIPTION Of NSE Dataset:

The data is of National Stock Exchange of India’s stock listings for each trading day of 2016 and 2017.

SYMBOL: Symbol of the listed company. SERIES: Series of the equity. Values are [EQ, BE, BL, BT, GC and IL] OPEN: The opening market price of the equity symbol on the date. HIGH: The highest market price of the equity symbol on the date. LOW: The lowest recorded market price of the equity symbol on the date. CLOSE: The closing recorded price of the equity symbol on the date. LAST: The last traded price of the equity symbol on the date. PREVCLOSE: The previous day closing price of the equity symbol on the date. TOTTRDQTY: Total traded quantity of the equity symbol on the date. TOTTRDVAL: Total traded volume of the equity symbol on the date. TIMESTAMP: Date of record. TOTALTRADES: Total trades executed on the day. ISIN: International Securities Identification Number.

#Q1: Read your dataset in R and visualize the length and breadth of your dataset.

setwd("F:/R-Internship/Course related files")
nse_stocks.df<-read.csv(paste("NSE_Stocks.csv",sep=""))
View(nse_stocks.df)
dim(nse_stocks.df)
## [1] 846404     13
#Q2: Create a descriptive statistics (min, max, median etc) of each variable.

library(psych)
summary(nse_stocks.df)
##         SYMBOL           SERIES            OPEN          
##  SRTRANSFIN:  4709   EQ     :739199   Min.   :     0.05  
##  IDFCBANK  :  4379   BE     : 37428   1st Qu.:    39.90  
##  IRFC      :  4362   SM     : 11229   Median :   139.20  
##  NHAI      :  3360   N6     :  5080   Mean   :   561.26  
##  RECLTD    :  2919   N2     :  4828   3rd Qu.:   490.00  
##  L&TINFRA  :  2760   N4     :  3722   Max.   :119990.00  
##  (Other)   :823915   (Other): 44918                      
##       HIGH                LOW                CLOSE          
##  Min.   :     0.05   Min.   :     0.05   Min.   :     0.05  
##  1st Qu.:    40.85   1st Qu.:    38.85   1st Qu.:    39.75  
##  Median :   142.05   Median :   136.15   Median :   138.90  
##  Mean   :   568.68   Mean   :   553.86   Mean   :   560.82  
##  3rd Qu.:   499.55   3rd Qu.:   481.10   3rd Qu.:   489.55  
##  Max.   :119990.00   Max.   :119990.00   Max.   :119990.00  
##                                                             
##       LAST            PREVCLOSE           TOTTRDQTY        
##  Min.   :     0.0   Min.   :     0.05   Min.   :        1  
##  1st Qu.:    39.6   1st Qu.:    39.70   1st Qu.:     6628  
##  Median :   138.4   Median :   138.75   Median :    44292  
##  Mean   :   560.2   Mean   :   560.23   Mean   :   698380  
##  3rd Qu.:   488.5   3rd Qu.:   489.15   3rd Qu.:   275732  
##  Max.   :119990.0   Max.   :119990.00   Max.   :781836478  
##                                                            
##    TOTTRDVAL              TIMESTAMP       TOTALTRADES     
##  Min.   :0.000e+00   2017-12-26:  1860   Min.   :      1  
##  1st Qu.:6.072e+05   2017-12-29:  1850   1st Qu.:     84  
##  Median :5.321e+06   2017-12-27:  1846   Median :    600  
##  Mean   :1.335e+08   2017-12-22:  1836   Mean   :   5014  
##  3rd Qu.:3.711e+07   2017-11-24:  1830   3rd Qu.:   3028  
##  Max.   :1.426e+11   2017-11-27:  1830   Max.   :1192900  
##                      (Other)   :835352                    
##            ISIN       
##  INE040A01026:   640  
##  INE237A01028:   508  
##  INE043D01016:   505  
##  INE092A01019:   504  
##  INE979R01011:   504  
##  INE095A01012:   501  
##  (Other)     :843242
describe(nse_stocks.df)
## Warning in FUN(newX[, i], ...): no non-missing arguments to min; returning
## Inf

## Warning in FUN(newX[, i], ...): no non-missing arguments to min; returning
## Inf

## Warning in FUN(newX[, i], ...): no non-missing arguments to min; returning
## Inf

## Warning in FUN(newX[, i], ...): no non-missing arguments to min; returning
## Inf
## Warning in FUN(newX[, i], ...): no non-missing arguments to max; returning
## -Inf

## Warning in FUN(newX[, i], ...): no non-missing arguments to max; returning
## -Inf

## Warning in FUN(newX[, i], ...): no non-missing arguments to max; returning
## -Inf

## Warning in FUN(newX[, i], ...): no non-missing arguments to max; returning
## -Inf
##             vars      n         mean           sd  min          max
## SYMBOL*        1 846404          NaN           NA  Inf         -Inf
## SERIES*        2 846404          NaN           NA  Inf         -Inf
## OPEN           3 846404       561.26      2005.76 0.05       119990
## HIGH           4 846404       568.68      2027.16 0.05       119990
## LOW            5 846404       553.86      1983.41 0.05       119990
## CLOSE          6 846404       560.82      2004.58 0.05       119990
## LAST           7 846404       560.18      2003.90 0.00       119990
## PREVCLOSE      8 846404       560.23      2000.36 0.05       119990
## TOTTRDQTY      9 846404    698380.05   4055629.69 1.00    781836478
## TOTTRDVAL     10 846404 133459225.96 629696791.82 0.05 142639995864
## TIMESTAMP*    11 846404          NaN           NA  Inf         -Inf
## TOTALTRADES   12 846404      5013.82     14901.12 1.00      1192900
## ISIN*         13 846404          NaN           NA  Inf         -Inf
##                    range        se
## SYMBOL*             -Inf        NA
## SERIES*             -Inf        NA
## OPEN        1.199899e+05      2.18
## HIGH        1.199899e+05      2.20
## LOW         1.199899e+05      2.16
## CLOSE       1.199899e+05      2.18
## LAST        1.199900e+05      2.18
## PREVCLOSE   1.199899e+05      2.17
## TOTTRDQTY   7.818365e+08   4408.28
## TOTTRDVAL   1.426400e+11 684451.42
## TIMESTAMP*          -Inf        NA
## TOTALTRADES 1.192899e+06     16.20
## ISIN*               -Inf        NA
#Q3: Create one-way contingency tables for the categorical variables in your dataset.

with(nse_stocks.df,table(SERIES))
## SERIES
##            BE     BL     BT     BZ     D1     DR     EQ     GB     H6 
##   2457  37428    227      1   2825     48    495 739199   2343      7 
##     HA     HB     HC     HE     IL     IT     IV     MF     N1     N2 
##      1      7     20      1    205    300    299    473   3576   4828 
##     N3     N4     N5     N6     N7     N8     N9     NB     NC     ND 
##   3098   3722   3661   5080   1658   2884   2021   1467   1195   1046 
##     NE     NF     NG     NH     NI     NJ     NK     NL     NM     NN 
##   1602    716    450    506    477    851    471    826    198    383 
##     NO     NP     NQ     NR     NS     NT     NU     NV     NW     NX 
##    544    410     19     86    266    120    341    183    476    375 
##     NY     NZ     P1     P2     Q1     Q2     SM     W2     Y1     Y2 
##    120    452    432    923    298     30  11229    402    310    212 
##     Y3     Y4     Y5     Y6     Y7     Y8     Y9     YA     YB     YC 
##    239     77     77    151    227    257    372    197    230    107 
##     YD     YG 
##     60    130
#Q4: Create two-way contingency tables for the categorical variables in your dataset.

nse_TAB<-nse_stocks.df[c(1:25,2500:2525),]
table(droplevels(nse_TAB$SYMBOL))
## 
##  20MICRONS 3IINFOTECH    3MINDIA    63MOONS    8KMILES   A2ZINFRA 
##          1          1          1          1          1          1 
## AARTIDRUGS   AARTIIND  AARVEEDEN       ABAN        ABB ABBOTINDIA 
##          1          1          1          1          1          1 
##      ABFRL    ABGSHIP ABIRLANUVO  ABMINTLTD        ACC    ACCELYA 
##          1          1          1          1          1          1 
##        ACE   ADANIENT ADANIPORTS ADANIPOWER ADANITRANS   ADFFOODS 
##          1          1          1          1          1          1 
##    ADHUNIK        ITI        IVC        IVP IVRCLINFRA  IVZINGOLD 
##          1          1          1          1          1          1 
## IVZINNIFTY       IZMO    J&KBANK     JAGRAN JAGSNPHARM  JAIBALAJI 
##          1          1          1          1          1          1 
## JAICORPLTD JAINSTUDIO      JALAN  JAMNAAUTO       JASH  JAYAGROGN 
##          1          1          1          1          1          1 
## JAYBARMARU JAYNECOIND JAYSREETEA JBCHEPHARM     JBFIND       JBMA 
##          1          1          1          1          1          1 
##      JCHAC JENSONICOL JETAIRWAYS 
##          1          1          1
table(droplevels(nse_TAB$SERIES))
## 
## BE BZ EQ SM 
##  4  1 44  2
#Q5: Draw a  boxplot of the variables that belong to your study.

nse_TAB3<-nse_stocks.df[(nse_stocks.df$SERIES=="Y2") | 
                          (nse_stocks.df$SERIES=="Y7"),]
nse_TAB3$S1<-droplevels(nse_TAB3$SERIES)

boxplot(HIGH~S1,data=nse_TAB3,xlab="Highest Market Price",ylab="Equity values",main="Variation of Highest Market Price of Y2 and Y7", yaxt="n",horizontal=TRUE)
axis(side=2,at=c(1,2),labels=c("Y2","Y7"))

boxplot(LAST~S1,data=nse_TAB3,xlab="Last Traded Price",
        ylab="Equity Value",main="Variation of Last Traded Price of Y2 and Y7",
        yaxt="n",horizontal=TRUE)
axis(side=2,at=c(1,2),labels=c("Y2","Y7"))

#Q6: Draw Histograms for your suitable data fields.

hist(nse_TAB3$OPEN,xlab="Equity Values",
     ylab="Count",main="Histogram of Opening Market Price",col=c("light blue"))

hist(nse_TAB3$PREVCLOSE,xlab="Closing Market Price on the previous day",
     ylab="Count",main="Histogram of Previous Day's Closing Market Prices",col=c("gold 1"))

#Q7: Draw suitable plot for your data fields.

library(car)
## 
## Attaching package: 'car'
## The following object is masked from 'package:psych':
## 
##     logit
scatterplot(nse_TAB3$LOW,nse_TAB3$TOTTRDQTY,xlab = "Lowest Market Price on a day",
            ylab= "Total Trade Quantity",
            main="Scatterplot of Total Trade Quantity vs Lowest Market Price",
            ylim=c(0,1000))

scatterplot(nse_TAB3$CLOSE,nse_TAB3$TOTTRDQTY,xlab = "Closing Market Price on a               day", ylab= "Total Trade Quantity",main="Scatterplot of Total Trade               Quantity vs Closing Market Price", ylim=c(0,1000))

#Q8: Create a correlation matrix.

library(psych)
nse<-nse_stocks.df[,c(3:10,12)]
corr.test(nse,use="complete")
## Call:corr.test(x = nse, use = "complete")
## Correlation matrix 
##              OPEN  HIGH   LOW CLOSE  LAST PREVCLOSE TOTTRDQTY TOTTRDVAL
## OPEN         1.00  1.00  1.00  1.00  1.00      1.00     -0.03      0.06
## HIGH         1.00  1.00  1.00  1.00  1.00      1.00     -0.03      0.06
## LOW          1.00  1.00  1.00  1.00  1.00      1.00     -0.03      0.06
## CLOSE        1.00  1.00  1.00  1.00  1.00      1.00     -0.03      0.06
## LAST         1.00  1.00  1.00  1.00  1.00      1.00     -0.03      0.06
## PREVCLOSE    1.00  1.00  1.00  1.00  1.00      1.00     -0.03      0.06
## TOTTRDQTY   -0.03 -0.03 -0.03 -0.03 -0.03     -0.03      1.00      0.45
## TOTTRDVAL    0.06  0.06  0.06  0.06  0.06      0.06      0.45      1.00
## TOTALTRADES  0.02  0.02  0.02  0.02  0.02      0.02      0.48      0.86
##             TOTALTRADES
## OPEN               0.02
## HIGH               0.02
## LOW                0.02
## CLOSE              0.02
## LAST               0.02
## PREVCLOSE          0.02
## TOTTRDQTY          0.48
## TOTTRDVAL          0.86
## TOTALTRADES        1.00
## Sample Size 
## [1] 846404
## Probability values (Entries above the diagonal are adjusted for multiple tests.) 
##             OPEN HIGH LOW CLOSE LAST PREVCLOSE TOTTRDQTY TOTTRDVAL
## OPEN           0    0   0     0    0         0         0         0
## HIGH           0    0   0     0    0         0         0         0
## LOW            0    0   0     0    0         0         0         0
## CLOSE          0    0   0     0    0         0         0         0
## LAST           0    0   0     0    0         0         0         0
## PREVCLOSE      0    0   0     0    0         0         0         0
## TOTTRDQTY      0    0   0     0    0         0         0         0
## TOTTRDVAL      0    0   0     0    0         0         0         0
## TOTALTRADES    0    0   0     0    0         0         0         0
##             TOTALTRADES
## OPEN                  0
## HIGH                  0
## LOW                   0
## CLOSE                 0
## LAST                  0
## PREVCLOSE             0
## TOTTRDQTY             0
## TOTTRDVAL             0
## TOTALTRADES           0
## 
##  To see confidence intervals of the correlations, print with the short=FALSE option
#Q9: Visualize your correlation matrix using corrgram.

nse4<-nse_stocks.df[1:1000,]
library(corrgram)
corrgram(nse4,order=TRUE,lower.panel = panel.shade, upper.panel = panel.pie,
        text.panel = panel.txt,main="Corrgram of nse_stocks intercorrelations")

#Q10: Create a scatter plot matrix for your data set.

library(car)
scatterplotMatrix(nse4[,c("OPEN","TOTTRDVAL","CLOSE","HIGH","LOW","LAST",
                  "TOTTRDQTY")],spread=FALSE,smoother.args = list(lty=2),
                  main="Scatter-plot Matrix of variables of nse_stocks dataset")

#Q11: Run a suitable test to check your hypothesis for your suitable assumptions.

library(MASS)
library(psych)
t.test(nse_stocks.df$OPEN,nse_stocks.df$LAST)
## 
##  Welch Two Sample t-test
## 
## data:  nse_stocks.df$OPEN and nse_stocks.df$LAST
## t = 0.34862, df = 1692800, p-value = 0.7274
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -4.965846  7.114581
## sample estimates:
## mean of x mean of y 
##  561.2588  560.1844
t.test(nse_stocks.df$LAST,nse_stocks.df$TOTALTRADES)
## 
##  Welch Two Sample t-test
## 
## data:  nse_stocks.df$LAST and nse_stocks.df$TOTALTRADES
## t = -272.52, df = 877010, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -4485.670 -4421.608
## sample estimates:
## mean of x mean of y 
##  560.1844 5013.8232
#Q12: Run a t-test to analyse your hypothesis.
library(MASS)
library(psych)
t.test(nse_stocks.df$TOTTRDVAL,nse_stocks.df$OPEN)
## 
##  Welch Two Sample t-test
## 
## data:  nse_stocks.df$TOTTRDVAL and nse_stocks.df$OPEN
## t = 194.99, df = 846400, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  132117163 134800167
## sample estimates:
##    mean of x    mean of y 
## 1.334592e+08 5.612588e+02
t.test(nse_stocks.df$TOTTRDVAL,nse_stocks.df$CLOSE)
## 
##  Welch Two Sample t-test
## 
## data:  nse_stocks.df$TOTTRDVAL and nse_stocks.df$CLOSE
## t = 194.99, df = 846400, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  132117163 134800167
## sample estimates:
##    mean of x    mean of y 
## 1.334592e+08 5.608152e+02