Rohit @Strategic Leadership India

Web Scraping Amazon.in for the brand “Transcend”

# R Code Starts here ...
library(rvest)
library("rvest", lib.loc="~/R/win-library/3.1")
# Testing on amazon transend 
url_amazon<-"http://www.amazon.in/s/ref=bl_dp_s_web_976392031?ie=UTF8&node=976392031&field-brandtextbin=Transcend"
url_amazon

[1] “http://www.amazon.in/s/ref=bl_dp_s_web_976392031?ie=UTF8&node=976392031&field-brandtextbin=Transcend

#
Amazon_titled_h1<- url_amazon %>%
  read_html() %>%
  html_nodes(".titled h1") %>%
  html_text()
Amazon_titled_h1 # Nothing on the webpage has a CSS SELECTOR - "titled h1"

character(0)

#
Amazon_Normal_Text<- url_amazon %>%
  read_html() %>%
  html_nodes(".a-text-normal") %>%
  html_text()
head(Amazon_Normal_Text)# Output for CSS Selector - ".a-text-normal"

[1] “1-24 of 263 results for Computers & Accessories : Transcend”
[2] “Computers & Accessories”
[3] “”
[4] “See available choices”
[5] “Transcend StoreJet 25M3 2.5-inch 1TB Portable External Hard Drive” [6] “Transcend StoreJet 25M3 2.5-inch 1TB Portable External Hard Drive”

tail(Amazon_Normal_Text)

[1] “”
[2] “Transcend JetFlash 820 16GB Pen Drive (Gold)” [3] “Transcend JetFlash 820 16GB Pen Drive (Gold)” [4] “  755.00”
[5] “2 offersfrom   749.00”
[6] “11”

#
Amazon_Normal_Text1<- url_amazon %>%
  read_html() %>%
  html_nodes(".a-text-normal:nth-child(1)") %>%
  html_text()
head(Amazon_Normal_Text1)# Output for CSS Selector - ".a-text-normal"

[1] “1-24 of 263 results for Computers & Accessories : Transcend”
[2] “Computers & Accessories”
[3] “”
[4] “See available choices”
[5] “Transcend StoreJet 25M3 2.5-inch 1TB Portable External Hard Drive” [6] “Transcend StoreJet 25M3 2.5-inch 1TB Portable External Hard Drive”

#
df<- data.frame(Amazon_Normal_Text,stringsAsFactors = FALSE)
View(df)
#
df1<- data.frame(Amazon_Normal_Text1,stringsAsFactors = FALSE)
View(df1)
#
Amazon_Normal_Text2<- url_amazon %>%
  read_html() %>%
  html_nodes(".a-text-normal:nth-child(1)") %>%
  html_text()
head(Amazon_Normal_Text2)# Output for CSS Selector - ".a-text-normal"

[1] “1-24 of 263 results for Computers & Accessories : Transcend”
[2] “Computers & Accessories”
[3] “”
[4] “See available choices”
[5] “Transcend StoreJet 25M3 2.5-inch 1TB Portable External Hard Drive” [6] “Transcend StoreJet 25M3 2.5-inch 1TB Portable External Hard Drive”

tail(Amazon_Normal_Text2)

[1] “6 offersfrom   3,795.00”
[2] “”
[3] “Transcend JetFlash 820 16GB Pen Drive (Gold)” [4] “Transcend JetFlash 820 16GB Pen Drive (Gold)” [5] “  755.00”
[6] “2 offersfrom   749.00”

#
df2<- data.frame(Amazon_Normal_Text2,stringsAsFactors = FALSE)
View(df2)
#
# R Code Ends here ...


Contact…