Import the library needed
library(selectr)
library(xml2)
library(rvest)
library(stringr)
library(jsonlite)
link to website
url <-"https://www.amazon.in/dp/B08L5WF8L4/ref=redir_mobile_desktop?_encoding=UTF8&aaxitk=8LXMMgZaS7fY7FFArslRPw&hsa_cr_id=9712803540902&pd_rd_plhdr=t&pd_rd_r=fcf3689f-d4d4-494d-8775-99ea28a08785&pd_rd_w=Z3FEp&pd_rd_wg=MdcaA&ref_=sbx_be_s_sparkle_td_asin_0_title"
Amazonwebpage <- read_html(url)
Extract the title from Amazon page
title_html <- html_nodes(Amazonwebpage, "h1#title")
title <- html_text(title_html)
head(title)
## [1] "\n\n\n\n\n\n\n\n\nNew Apple iPhone 12 Mini (128GB) - White\n\n\n\n\n\n\n\n\n\n\n\n\n\n"
title <- str_replace_all(title, "[\r\n]" , "")
head(title)
## [1] "New Apple iPhone 12 Mini (128GB) - White"
Extract price from Amazon page
price_html <- html_nodes(Amazonwebpage, "span#priceblock_ourprice")
price <- html_text(price_html)
head(price)
## [1] "₹ 69,900.00"
price <- str_replace_all(price, "[\r\n]" , "")
head(price)
## [1] "₹ 69,900.00"
Extract product description from Amazon Page
desc_html <- html_nodes(Amazonwebpage, "div#featurebullets_feature_div")
desc <- html_text(desc_html)
head(desc)
## [1] "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n5.4-inch (13.7 cm diagonal) Super Retina XDR display\n\n\n\n\nCeramic Shield, tougher than any smartphone glass\n\n\n\n\nA14 Bionic chip, the fastest chip ever in a smartphone\n\n\n\n\nAdvanced dual-camera system with 12MP Ultra Wide and Wide cameras; Night mode, Deep Fusion, Smart HDR 3, 4K Dolby Vision HDR recording\n\n\n\n\n12MP TrueDepth front camera with Night mode, 4K Dolby Vision HDR recording\n\n\n\n\nIndustry-leading IP68 water resistance\n\n\n\n\nSupports MagSafe accessories for easy attach and faster wireless charging\n\n\n\n\niOS with redesigned widgets on the Home screen, all-new App Library, App Clips and more\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n"
desc <- str_replace_all(desc, "[\r\n]" , "")
head(desc)
## [1] "5.4-inch (13.7 cm diagonal) Super Retina XDR displayCeramic Shield, tougher than any smartphone glassA14 Bionic chip, the fastest chip ever in a smartphoneAdvanced dual-camera system with 12MP Ultra Wide and Wide cameras; Night mode, Deep Fusion, Smart HDR 3, 4K Dolby Vision HDR recording12MP TrueDepth front camera with Night mode, 4K Dolby Vision HDR recordingIndustry-leading IP68 water resistanceSupports MagSafe accessories for easy attach and faster wireless chargingiOS with redesigned widgets on the Home screen, all-new App Library, App Clips and more"
Extract product rating from Amazon page
rate_html <- html_nodes(Amazonwebpage, "span#acrPopover")
rate <- html_text(rate_html)
head(rate)
## [1] "\n\n\n\n4.5 out of 5 stars\n\n\n\n\n"
# remove spaces and newlines and tabs
rate <- str_replace_all(rate, "[\r\n]", "")
rate <- str_trim(rate)
head(rate)
## [1] "4.5 out of 5 stars"
Extract product size from Amazon page
size_html <- html_nodes(Amazonwebpage, "div#variation_style_name")
size_html <- html_nodes(size_html, "span.selection")
size <- html_text(size_html)
head(size)
## [1] "\n\n128GB\n\n"
size <- str_replace_all(size, "[\r\n]", "")
head(size)
## [1] "128GB"
Extract product color from Amazon page
color_html <- html_nodes(Amazonwebpage, "div#variation_color_name")
color_html <- html_nodes(color_html, "span.selection")
color <- html_text(color_html)
head(color)
## [1] "\n\nWhite\n\n"
# remove tabs from text
color <- str_trim(color)
head(color)
## [1] "White"
Combine all the list to form a data
product_data <- data.frame(Title = title, Price = price,Description = desc, Rating = rate, Size = size, Color = color)
#Structure of the data frame
str(product_data)
## 'data.frame': 1 obs. of 6 variables:
## $ Title : chr "New Apple iPhone 12 Mini (128GB) - White"
## $ Price : chr "₹ 69,900.00"
## $ Description: chr "5.4-inch (13.7 cm diagonal) Super Retina XDR displayCeramic Shield, tougher than any smartphone glassA14 Bionic"| __truncated__
## $ Rating : chr "4.5 out of 5 stars"
## $ Size : chr "128GB"
## $ Color : chr "White"
Store it into JSON Format
json_data <- toJSON(product_data)
cat(json_data)
## [{"Title":"New Apple iPhone 12 Mini (128GB) - White","Price":"₹ 69,900.00","Description":"5.4-inch (13.7 cm diagonal) Super Retina XDR displayCeramic Shield, tougher than any smartphone glassA14 Bionic chip, the fastest chip ever in a smartphoneAdvanced dual-camera system with 12MP Ultra Wide and Wide cameras; Night mode, Deep Fusion, Smart HDR 3, 4K Dolby Vision HDR recording12MP TrueDepth front camera with Night mode, 4K Dolby Vision HDR recordingIndustry-leading IP68 water resistanceSupports MagSafe accessories for easy attach and faster wireless chargingiOS with redesigned widgets on the Home screen, all-new App Library, App Clips and more","Rating":"4.5 out of 5 stars","Size":"128GB","Color":"White"}]