Plots
myfile <- 'https://raw.githubusercontent.com/ishantnayer/Rfiles/master/listings.csv'
listings<- read.csv(myfile)
library(tidytext)
require(tidyverse)
## Loading required package: tidyverse
## Loading tidyverse: ggplot2
## Loading tidyverse: tibble
## Loading tidyverse: tidyr
## Loading tidyverse: readr
## Loading tidyverse: purrr
## Loading tidyverse: dplyr
## Conflicts with tidy packages ----------------------------------------------
## filter(): dplyr, stats
## lag(): dplyr, stats
require(stringr)
## Loading required package: stringr
require(leaflet)
## Loading required package: leaflet
require(ggmap)
## Loading required package: ggmap
listings$price <- as.numeric(sub("\\$","", listings$price))
## Warning: NAs introduced by coercion
listings$description <- as.character(listings$description)
#getting the top 8 neigbourhoods, based on listing nos.
top_neighbourhoods <- listings %>%
group_by(neighbourhood_cleansed) %>%
summarise(count = n()) %>%
arrange(desc(count)) %>%
top_n(8)
## Selecting by count
#filtering listings in these 'hoods
top_listings <- listings %>%
filter(neighbourhood_cleansed %in% top_neighbourhoods$neighbourhood_cleansed)
#unnesting indiv. words for these listings
top_listings_words <- top_listings %>%
select(id, description, neighbourhood_cleansed, review_scores_rating) %>%
unnest_tokens(word, description) %>%
filter(!word %in% stop_words$word, str_detect(word, "^[a-z']+$"))
#get word-sentiment lexicon
nrc <- sentiments %>%
filter(lexicon == "nrc") %>%
dplyr::select(word, sentiment)
#count total words in each neighbourhood
hood_tot_words <- top_listings_words %>%
group_by(neighbourhood_cleansed) %>%
mutate(total_words = n()) %>%
ungroup() %>%
distinct(id, neighbourhood_cleansed, total_words)
#count words assoc. with each type of sentiment in each 'hood
by_hood_sentiment <- top_listings_words %>%
inner_join(nrc, by = "word") %>%
count(sentiment, id) %>%
ungroup() %>%
complete(sentiment, id, fill = list(n = 0)) %>%
inner_join(hood_tot_words) %>%
group_by(neighbourhood_cleansed, sentiment, total_words) %>%
summarize(words = sum(n)) %>%
mutate(prop = round(words / total_words * 100, digits=1)) %>%
ungroup()
## Joining, by = "id"
m<-ggplot(data=by_hood_sentiment) +
geom_bar(mapping=aes(x=neighbourhood_cleansed,
y=prop),
stat="identity", fill = "orange") +
facet_wrap( ~ sentiment) +
labs(title="Proportion wise Reviews",
x="Neighbourhood", y="Proportion \n (sentiment word count / total word count)") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 90, hjust = 1))
print(m)
