Week 7 class

Plots

myfile <- 'https://raw.githubusercontent.com/ishantnayer/Rfiles/master/listings.csv'
listings<- read.csv(myfile)

library(tidytext)
require(tidyverse)

## Loading required package: tidyverse

## Loading tidyverse: ggplot2
## Loading tidyverse: tibble
## Loading tidyverse: tidyr
## Loading tidyverse: readr
## Loading tidyverse: purrr
## Loading tidyverse: dplyr

## Conflicts with tidy packages ----------------------------------------------

## filter(): dplyr, stats
## lag():    dplyr, stats

require(stringr)

## Loading required package: stringr

require(leaflet)

## Loading required package: leaflet

require(ggmap)

## Loading required package: ggmap

listings$price <- as.numeric(sub("\\$","", listings$price))

## Warning: NAs introduced by coercion

listings$description <- as.character(listings$description)

#getting the top 8 neigbourhoods, based on listing nos.
top_neighbourhoods <- listings %>%
  group_by(neighbourhood_cleansed) %>%
  summarise(count = n()) %>%
  arrange(desc(count)) %>%
  top_n(8)

## Selecting by count

#filtering listings in these 'hoods
top_listings <- listings %>%
  filter(neighbourhood_cleansed %in% top_neighbourhoods$neighbourhood_cleansed)

#unnesting indiv. words for these listings
top_listings_words <- top_listings %>%
  select(id, description, neighbourhood_cleansed, review_scores_rating) %>%
  unnest_tokens(word, description) %>%
  filter(!word %in% stop_words$word, str_detect(word, "^[a-z']+$"))

#get word-sentiment lexicon
nrc <- sentiments %>%
  filter(lexicon == "nrc") %>%
  dplyr::select(word, sentiment)

#count total words in each neighbourhood
hood_tot_words <- top_listings_words %>%
  group_by(neighbourhood_cleansed) %>%
  mutate(total_words = n()) %>%
  ungroup() %>%
  distinct(id, neighbourhood_cleansed, total_words)


#count words assoc. with each type of sentiment in each 'hood
by_hood_sentiment <- top_listings_words %>%
  inner_join(nrc, by = "word") %>%
  count(sentiment, id) %>%
  ungroup() %>%
  complete(sentiment, id, fill = list(n = 0)) %>%
  inner_join(hood_tot_words) %>%
  group_by(neighbourhood_cleansed, sentiment, total_words) %>%
  summarize(words = sum(n)) %>%
  mutate(prop = round(words / total_words * 100, digits=1)) %>%
  ungroup()

## Joining, by = "id"

m<-ggplot(data=by_hood_sentiment) +
  geom_bar(mapping=aes(x=neighbourhood_cleansed,
                       y=prop),
           stat="identity",  fill = "orange") +
  facet_wrap( ~ sentiment) +
  labs(title="Proportion wise Reviews",
       x="Neighbourhood", y="Proportion \n (sentiment word count / total word count)") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 90, hjust = 1))

print(m)

Week 7 class

Ishant Nayer

12/3/2016

Plots