This is a web-scraping exercise to get the latest updates of stocks on the nifty-50 list.
First get the rvest package
library(rvest)
Load the url
link <- read_html("https://www.moneycontrol.com/stocks/marketstats/indexcomp.php?optex=NSE&opttopic=indexcomp&index=9")
link %>%
html_node("title") %>%
html_text()
[1] "Index Composition Nse, Stock/Share Indices, Sensex Indices, Composition Index, Stocks Gain / Lost"
Get the list from the tab;e in the page
urls <- link %>%
html_node(xpath = '//*[@id="mc_mainWrapper"]/div[3]/div[1]/div[6]/div[2]/div/table') %>%
html_table()
urls
See the size of the data frame
dim(urls)
[1] 50 6
Clean the data from the first name column since it is not readble
# Init an empty stock list
stock_list = {}
for (items in c(1:nrow(urls))) {
# Extract name of scrip
stock_name <- as.character(urls[items,1])
stock_name <- gsub(pattern = "list|Add to Watch|Add to Portfolio|\n|\t|\r", replacement = "", x = stock_name)
print(paste0("stock names : ", stock_name))
stock_list[items] <- stock_name
}
[1] "stock names : Adani Ports"
[1] "stock names : Asian Paints"
[1] "stock names : Axis Bank"
[1] "stock names : Bajaj Auto"
[1] "stock names : Bajaj Finance"
[1] "stock names : Bajaj Finserv"
[1] "stock names : Bharti Airtel"
[1] "stock names : Bharti Infratel"
[1] "stock names : BPCL"
[1] "stock names : Britannia"
[1] "stock names : Cipla"
[1] "stock names : Coal India"
[1] "stock names : Dr Reddys Labs"
[1] "stock names : Eicher Motors"
[1] "stock names : GAIL"
[1] "stock names : Grasim"
[1] "stock names : HCL Tech"
[1] "stock names : HDFC"
[1] "stock names : HDFC Bank"
[1] "stock names : Hero Motocorp"
[1] "stock names : Hindalco"
[1] "stock names : HUL"
[1] "stock names : ICICI Bank"
[1] "stock names : Indiabulls Hsg"
[1] "stock names : IndusInd Bank"
[1] "stock names : Infosys"
[1] "stock names : IOC"
[1] "stock names : ITC"
[1] "stock names : JSW Steel"
[1] "stock names : Kotak Mahindra"
[1] "stock names : Larsen"
[1] "stock names : M&M"
[1] "stock names : Maruti Suzuki"
[1] "stock names : NTPC"
[1] "stock names : ONGC"
[1] "stock names : Power Grid Corp"
[1] "stock names : Reliance"
[1] "stock names : SBI"
[1] "stock names : Sun Pharma"
[1] "stock names : Tata Motors"
[1] "stock names : Tata Steel"
[1] "stock names : TCS"
[1] "stock names : Tech Mahindra"
[1] "stock names : Titan Company"
[1] "stock names : UltraTechCement"
[1] "stock names : UPL"
[1] "stock names : Vedanta"
[1] "stock names : Wipro"
[1] "stock names : Yes Bank"
[1] "stock names : Zee Entertain"
length(stock_list)
[1] 50
stock_list[50]
[1] "Zee Entertain"
create a new dataframe of the stock price list
# Create dataframe of stock list
df_nifty50 <- data.frame(stock_names=stock_list,urls[2:6])
dim(df_nifty50)
[1] 50 6
names(df_nifty50)
[1] "stock_names" "Industry" "LastPrice" "Change" "X.Chg"
[6] "Mkt.Cap.Rs.cr."
head(df_nifty50)
display the proper stocks list
df_nifty50