Project 2_3 (Dataset of stock market in India)

Original Data Reference: https://www.kaggle.com/souravroy1/stock-market-data/download

Let’s load the required libraries in R for data analysis
library(dplyr)
library(downloader)
library(stringr)
library(htmlTable)
library(tidyverse)
library(reshape2)


Download and read the CSV file to a dataframe object
View the data
# Import the data from the csv file

stock_market_data <- read.csv("https://raw.githubusercontent.com/baruab/msdsrepo/main/DATA-607/Stock_Market_data.csv", stringsAsFactors = FALSE)

head(stock_market_data, 5) 
##        Share Category      Sector   RM   Up Last.Traded.Price Percentage.Change
## 1  20MICRONS     Exit   CHEMICALS #N/A #N/A                43              #N/A
## 2 3IINFOTECH     BWLV          IT #N/A #N/A              5.35              #N/A
## 3    3MINDIA     Exit DIVERSIFIED    0    0          12219.65              1.23
## 4    63MOONS       RS          IT #N/A #N/A              75.6              0.26
## 5    8KMILES      HWV          IT    0    0             623.9              0.17
##   High.Price Low.Price X..High.Movt  X..Low.movt Yearly.Gainner X27th.Dec
## 1       #N/A      #N/A         #N/A         #N/A            30%       36%
## 2       #N/A      #N/A         #N/A         #N/A            26%        1%
## 3   12399.95     12115  0.014540381  0.008638052            13%       22%
## 4       76.7      75.6  0.014341591            0                         
## 5        640       624   0.02515625 -0.000160256           -37%       -1%
##   X1st.Feb X1st.March X1st.April TB       RH Corr PeRatio      New.Pe W52_High
## 1      11%        19%        23%       49.40  13%   10.72 16.53846154    49.40
## 2      -3%         5%         6% TB     5.80   8%    6.89           8     7.15
## 3       8%         9%         6% TB 12231.00   0%   66.29          81 15250.00
## 4                                      82.95   9%  213.40         630    96.00
## 5       4%         7%         3% TB   663.80   6%   23.20          18  1119.00
##   Corre latest Annual_Pat Cum.PAT.3.Quarter  Pat.Jump Annual.Growth Year.End
## 1   13%  42775    11.0609           11.7275    0.6666                       
## 2   25%     NA -1059.2700           60.7600 1120.0300             G         
## 3   20%  42775   200.9298          150.3639  -50.5659                       
## 4   21%  42767     0.0000            0.1442    0.1442                       
## 5   44%     NA    53.2361           85.4227   32.1866             G         
##   Mar_17_ReportedPAT Dec_16_ReportedPAT Sep_16_ReportedPAT Jun_16_ReportedPAT
## 1             0.0000             2.2822             4.4844             4.9609
## 2            33.3400            19.0100            21.5800            20.1700
## 3             0.0000            42.4332            48.2087            59.7220
## 4             0.0000             0.1442             0.0000             0.0000
## 5            40.1806            32.6003            28.6688            24.1536
##   Mar_16_ReportedPAT Dec_15_ReportedPAT Y.oY.PAT LQ.0.5.4q.s. PAT.Growth
## 1             2.3595             3.2153        Y                       Y
## 2          -812.2800           -79.0800        Y                       Y
## 3            58.4132            56.6134        Y                       Y
## 4             0.0000           130.8301                                 
## 5            16.3649            15.1859        Y                       Y
##   PeRatio.1 ResultDate Mar_17_Inst_no_of_shares Dec_16_Inst_no_of_shares
## 1     10.72      42775                        3                        2
## 2      6.89      42855                       46                       49
## 3     66.29      42775                       51                       48
## 4    213.40      42767                       14                       15
## 5     23.20      42865                        6                        6
##   Int..Incr..Decr Pledge DividendYield DERatio FB Mar_17_Eps_Before
## 1               1  54.38          0.00    1.70 FB                 0
## 2              -3   0.00          0.00   -0.52 FB              0.27
## 3               3   0.00          0.00    0.01 FB                 0
## 4              -1   0.00          6.61    0.16 FB                 0
## 5               0   8.78          0.00    0.01                17.12
##   Dec_16_Eps_Before Sep_16_Eps_Before Jun_16_Eps_Before Mar_16_Eps_Before
## 1              0.65              1.29              1.41              0.67
## 2              0.17              0.14              0.31            -12.69
## 3             37.67             42.79             53.02             51.85
## 4              0.03                 0                 0                 0
## 5               8.8             20.65             16.89             11.34
##   Dec_15_Eps_Before Annual_Eps      Volume MarketCap Annual_Pat.1 EquityCapital
## 1              0.91       3.11 0.532215284       152      11.0609       17.6433
## 2             -1.32     -16.96  1.16451032       633     -1059.27       1047.45
## 3             50.26     178.36 1.041870422     13766     200.9298       11.2651
## 4             28.39          0        1.25       348            0        9.2157
## 5             10.34      36.34 4.816666174      1904      53.2361       15.2588
##   FaceValue New.Listing PE.High.Price.High Future Cap.Category
## 1         5                             NA             MiniCap
## 2        10                             NA             MiniCap
## 3        10                             NA              MidCap
## 4         2                             NA             MiniCap
## 5         5                             NA            SmallCap

Filter the rows with non-standard Sector values

Clean the data, remove # values

stock_market_df <- stock_market_data%>% filter(str_detect(Sector, "^[A-Z]"))  %>%  select(Share,Sector ,Last.Traded.Price, 
 Percentage.Change,High.Price, Low.Price,Yearly.Gainner,PeRatio,New.Pe, W52_High, MarketCap, DividendYield ) %>% arrange(Sector)

stock_market_df <- sapply(stock_market_df,function(x) {x <- gsub("#N/A" ,"", gsub("#DIV/0!","",x)) })

head(stock_market_df, 5)
##      Share        Sector                          Last.Traded.Price
## [1,] "GLOBALVECT" "AIRLINES"                      "107.4"          
## [2,] "INDIGO"     "AIRLINES"                      "1123.9"         
## [3,] "JETAIRWAYS" "AIRLINES"                      "540.45"         
## [4,] "AMARAJABAT" "AUTO-AUTO ANCL AND COMPONENTS" "899.95"         
## [5,] "AMTEKAUTO"  "AUTO-AUTO ANCL AND COMPONENTS" "39.15"          
##      Percentage.Change High.Price Low.Price Yearly.Gainner PeRatio
## [1,] ""                ""         ""        "45%"          "14.63"
## [2,] "-3.69"           "1130"     "1075"    "27%"          "24.7" 
## [3,] "-1.93"           "546.55"   "525.85"  "-1%"          "8.17" 
## [4,] "1.33"            "918.4"    "905"     "2%"           "31.43"
## [5,] "-1.15"           "39.85"    "38.25"   "12%"          "0"    
##      New.Pe         W52_High MarketCap DividendYield
## [1,] "17"           "137"    "150"     "0"          
## [2,] "20.98394324"  "1168.7" "40975"   "0"          
## [3,] "11"           "669.9"  "6139"    "0"          
## [4,] "34"           "1079.7" "15372"   "0.47"       
## [5,] "-0.910465116" "56.25"  "972"     "0"

Statistical value by Sector

Clean the data

Sorted by number of companies per Sector with their mean Last traded price

stock_market_df <-  as.data.frame(stock_market_df)

stock_market_df$Last.Traded.Price <- as.numeric(stock_market_df$Last.Traded.Price)

summarize_Sector <- stock_market_df %>% group_by( Sector) %>% summarise(Mean_Last_Traded_Price = round(mean(Last.Traded.Price, na.rm=TRUE), 2), Num_Companies = n()) %>% filter(!is.na(mean)) %>% arrange(desc(Num_Companies))

head(summarize_Sector)
## # A tibble: 6 x 3
##   Sector                    Mean_Last_Traded_Price Num_Companies
##   <chr>                                      <dbl>         <int>
## 1 TEXTILES AND APPAREL                        331.           125
## 2 IT                                          322.           113
## 3 METALS AND MINING                           152.           100
## 4 INFRA-MACHINERY EQUIPMENT                   590.            98
## 5 PHARMA                                      608.            88
## 6 REALTY-HOUSEHOLD                            465.            87

Sort the companies by their Market Cap, display their Yearly Gain %, PE Ratio and Divident Yield

Display top 20 companies

stock_market_df$MarketCap <- as.numeric(stock_market_df$MarketCap)

summarize_MarketCap <- head( stock_market_df %>% filter(!is.na(MarketCap)) %>% arrange(desc(MarketCap)), 20) %>% select(Share, Yearly.Gainner,PeRatio,MarketCap, DividendYield )

htmlTable(summarize_MarketCap)
Share Yearly.Gainner PeRatio MarketCap DividendYield
1 TCS 0% 17.44 459592 2.01
2 RELIANCE 31% 14.79 441903 0.8
3 HDFCBANK 38% 27.34 398023 0.7
4 ITC 30% 33.57 332778 3.1
5 HDFC 39% 22.61 249739 1.14
6 ONGC 34% 13.28 241201 4.52
7 SBIN 54% 52.45 234780 0.88
8 INFY -14% 15.09 216751 2.72
9 HINDUNILVR 8% 48.76 215645 1.6
10 IOC 100% 12.55 214364 3.17
11 MARUTI 64% 27.71 203534 1.11
12 ICICIBANK 22% 17.74 173958 0.83
13 COALINDIA 3% 16 172907 9.83
14 KOTAKBANK 30% 34.52 170771 0.06
15 LT 31% 29.69 162319 1.04
16 SUNPHARMA -14% 18.85 154226 0.15
17 BHARTIARTL 6% 31.39 149003 0
18 TATAMOTORS 25% 17.47 146803 0.11
19 NTPC 30% 13.35 134076 2.06
20 AXISBANK 14% 34.26 126132 0.94