url <- "https://doesitreallywork.org/red-light-therapy/"
df <- url%>%
read_html()%>%
html_nodes("p")%>%
html_text()%>%
data.frame(data = .)
nrow(df) #Number of Rows
## [1] 326
df$data <- gsub("Reply", "", df$data)
#For loop to take out blank rows
for(i in 1:nrow(df)){
if(df$data[i] == ""){
df$blank[i] <- "Yes"
} else {
df$blank[i] <- "No"
}
}
#Take out blank rows
df <-df[df$blank == "No",]
head(df$data,2) #First two Reviews
## [1] "There is a newly discovered method in anti-aging technology called red light therapy. This system is mostly offered in salons, although you also have the option of setting one up at home. The system uses infrared lights as the main feature of this machine and it has been known to provide therapeutic benefits to the body."
## [2] "Anti-aging systems, techniques, and methods always get attention from health-conscious and beauty-conscious individuals. Beauty plays a major role in this modern age as celebrities and icons seem to be aging gracefully through the years. Surely, you would want to look as good too even if you are growing older every year. And with all the modern technology available today, staying young and looking young is not only possible, but achievable as well."
hist(nchar(df$data), col = "green")
####Looks like most of the comments are 300 characters long or about 60 words. Ok back to work here.
tidy <- df%>%
unnest_tokens(word, data)%>%
anti_join(stop_words)
## Joining, by = "word"
head(tidy, 10) #first ten
## blank word
## 1 No newly
## 2 No discovered
## 3 No method
## 4 No anti
## 5 No aging
## 6 No technology
## 7 No called
## 8 No red
## 9 No light
## 10 No therapy
sentiment <- tidy%>%
inner_join(get_sentiments("bing"))%>%
count(word, sentiment, sort = TRUE)
## Joining, by = "word"
head(sentiment, 10)
## # A tibble: 10 x 3
## word sentiment n
## <chr> <chr> <int>
## 1 pain negative 33
## 2 beauty positive 13
## 3 unlimited positive 12
## 4 love positive 11
## 5 angel positive 10
## 6 amazing positive 8
## 7 bad negative 7
## 8 wow positive 7
## 9 benefits positive 6
## 10 effective positive 6
sentiment%>%
acast(word ~ sentiment, value.var = "n", fill = 0)%>%
comparison.cloud(colors = c("steelblue", "red"),
max.words = 100)