Plots

myfile <- 'https://raw.githubusercontent.com/ishantnayer/Rfiles/master/listings.csv'
listings<- read.csv(myfile)

library(tidytext)
require(tidyverse)
## Loading required package: tidyverse
## Loading tidyverse: ggplot2
## Loading tidyverse: tibble
## Loading tidyverse: tidyr
## Loading tidyverse: readr
## Loading tidyverse: purrr
## Loading tidyverse: dplyr
## Conflicts with tidy packages ----------------------------------------------
## filter(): dplyr, stats
## lag():    dplyr, stats
require(stringr)
## Loading required package: stringr
require(leaflet)
## Loading required package: leaflet
require(ggmap)
## Loading required package: ggmap
listings$price <- as.numeric(sub("\\$","", listings$price))
## Warning: NAs introduced by coercion
listings$description <- as.character(listings$description)

#getting the top 8 neigbourhoods, based on listing nos.
top_neighbourhoods <- listings %>%
  group_by(neighbourhood_cleansed) %>%
  summarise(count = n()) %>%
  arrange(desc(count)) %>%
  top_n(8)
## Selecting by count
#filtering listings in these 'hoods
top_listings <- listings %>%
  filter(neighbourhood_cleansed %in% top_neighbourhoods$neighbourhood_cleansed)

#unnesting indiv. words for these listings
top_listings_words <- top_listings %>%
  select(id, description, neighbourhood_cleansed, review_scores_rating) %>%
  unnest_tokens(word, description) %>%
  filter(!word %in% stop_words$word, str_detect(word, "^[a-z']+$"))

#get word-sentiment lexicon
nrc <- sentiments %>%
  filter(lexicon == "nrc") %>%
  dplyr::select(word, sentiment)

#count total words in each neighbourhood
hood_tot_words <- top_listings_words %>%
  group_by(neighbourhood_cleansed) %>%
  mutate(total_words = n()) %>%
  ungroup() %>%
  distinct(id, neighbourhood_cleansed, total_words)


#count words assoc. with each type of sentiment in each 'hood
by_hood_sentiment <- top_listings_words %>%
  inner_join(nrc, by = "word") %>%
  count(sentiment, id) %>%
  ungroup() %>%
  complete(sentiment, id, fill = list(n = 0)) %>%
  inner_join(hood_tot_words) %>%
  group_by(neighbourhood_cleansed, sentiment, total_words) %>%
  summarize(words = sum(n)) %>%
  mutate(prop = round(words / total_words * 100, digits=1)) %>%
  ungroup()
## Joining, by = "id"
m<-ggplot(data=by_hood_sentiment) +
  geom_bar(mapping=aes(x=neighbourhood_cleansed,
                       y=prop),
           stat="identity",  fill = "orange") +
  facet_wrap( ~ sentiment) +
  labs(title="Proportion wise Reviews",
       x="Neighbourhood", y="Proportion \n (sentiment word count / total word count)") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 90, hjust = 1))

print(m)