Visuals should be created in R with ggplot2; Provide all relevant code, comments and interpretive insights in an R Notebook; Two or more of your five visuals must somehow highlight, contest, incorporate, address or be related to, two of Rosling’s instincts that we have discussed (i.e., Gap, Negativity, Straight Line, Fear or Size). It is up to you how you want to address/incorporate/dispel these instincts in your visuals.
library(tidyverse)
## ── Attaching packages ────────────────────────────────────────────────── tidyverse 1.2.1 ──
## ✔ ggplot2 2.2.1 ✔ purrr 0.2.4
## ✔ tibble 1.4.2 ✔ dplyr 0.7.4
## ✔ tidyr 0.8.0 ✔ stringr 1.2.0
## ✔ readr 1.1.1 ✔ forcats 0.2.0
## ── Conflicts ───────────────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
library(RColorBrewer)
library(ggmap)
library(caret)
## Warning: package 'caret' was built under R version 3.4.4
## Loading required package: lattice
##
## Attaching package: 'caret'
## The following object is masked from 'package:purrr':
##
## lift
AirbnbLA <- read_csv("~/Documents/Classes(School)/2018-19/Geospatial Visualization/LA Airbnb Exercise/listings.csv", na = "..")
## Parsed with column specification:
## cols(
## .default = col_character(),
## id = col_integer(),
## scrape_id = col_double(),
## last_scraped = col_date(format = ""),
## host_id = col_integer(),
## host_since = col_date(format = ""),
## host_listings_count = col_integer(),
## host_total_listings_count = col_integer(),
## latitude = col_double(),
## longitude = col_double(),
## accommodates = col_integer(),
## bathrooms = col_double(),
## bedrooms = col_integer(),
## beds = col_integer(),
## square_feet = col_integer(),
## guests_included = col_integer(),
## minimum_nights = col_integer(),
## maximum_nights = col_integer(),
## availability_30 = col_integer(),
## availability_60 = col_integer(),
## availability_90 = col_integer()
## # ... with 14 more columns
## )
## See spec(...) for full column specifications.
head(AirbnbLA)
Los_Angeles <- AirbnbLA %>% filter(city == "Los Angeles") %>% na.omit() %>% select(city, cleaning_fee, price, property_type, room_type, accommodates, host_since, host_is_superhost, weekly_price, monthly_price, minimum_nights, number_of_reviews, review_scores_rating, review_scores_accuracy, review_scores_cleanliness, review_scores_communication, review_scores_location, latitude, longitude)
Los_Angeles$price_2 = as.numeric(gsub("\\$", "", Los_Angeles$price))
## Warning: NAs introduced by coercion
Los_Angeles$cleaning_fee_2 = as.numeric(gsub("\\$", "", Los_Angeles$cleaning_fee))
Los_Angeles$weekly_price_2 = as.numeric(gsub("\\$", "", Los_Angeles$weekly_price))
## Warning: NAs introduced by coercion
Los_Angeles$monthly_price_2 = as.numeric(gsub("\\$", "", Los_Angeles$monthly_price))
## Warning: NAs introduced by coercion
Los_Angeles_Pricing <- Los_Angeles %>% filter(price_2 <= 450)
ggplot(Los_Angeles, aes(x = price_2, y=accommodates, fill = property_type)) +
geom_point(size = 4, shape = 21, color = "white", stroke = 1.5) +
scale_fill_brewer(palette = "Paired") +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
labs(title="Rental types vs Pricing vs Accomdates", subtitle="Airbnb Los Angeles", caption="Source: Inside Airbnb",x="Price", y="Accomodates") + theme_bw() +
theme(panel.border = element_blank(), panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.line = element_line(colour = "black"))
## Warning: Removed 1 rows containing missing values (geom_point).
ggplot(Los_Angeles, aes(x = host_since, y = number_of_reviews, color = host_is_superhost)) + geom_point()+ geom_abline() +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
labs(title="Number of Reviews Over Time", subtitle="Airbnb Los Angeles", caption="Source: Inside Airbnb",x="Host since", y="Number of reviews") + theme_bw() +
theme(panel.border = element_blank(), panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.line = element_line(colour = "black"))
ggplot(Los_Angeles_Pricing, aes(x = host_since, y = price_2, color = host_is_superhost)) + geom_col()+
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
labs(title="Host vs Superhost Price Increases Over Time", subtitle="Airbnb Los Angeles", caption="Source: Inside Airbnb",x="Host since", y="Price") + theme_bw() +
theme(panel.border = element_blank(), panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.line = element_line(colour = "black"))
ggplot() +
geom_point(data = Los_Angeles_Pricing, mapping = aes(x=cleaning_fee_2, y = price_2,
col = review_scores_rating ))+
scale_color_distiller(palette = "YlOrRd", direction = 1) +
labs(title="How Prices and Cleaning Fees Affect Reviews", subtitle="Airbnb Los Angeles", caption="Source: Inside Airbnb",x="Cleaning fee", y="Price") + theme_bw() +
theme(panel.border = element_blank(), panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.line = element_line(colour = "black"))
## Warning: Removed 10 rows containing missing values (geom_point).
#ggmap - an object of class ggmap (from function get_map)
height <- max(Los_Angeles$latitude) - min(Los_Angeles$latitude)
width <- max(Los_Angeles$longitude) - min(Los_Angeles$longitude)
LA_borders <- c(bottom = min(Los_Angeles$latitude) - 0.1 * height,
top = max(Los_Angeles$latitude) + 0.1 * height,
left = min(Los_Angeles$longitude) - 0.1 * width,
right = max(Los_Angeles$longitude) + 0.1 * width)
map <- get_stamenmap(LA_borders, zoom = 10, maptype = "toner-lite")
## Map from URL : http://tile.stamen.com/toner-lite/10/174/408.png
## Map from URL : http://tile.stamen.com/toner-lite/10/175/408.png
## Map from URL : http://tile.stamen.com/toner-lite/10/174/409.png
## Map from URL : http://tile.stamen.com/toner-lite/10/175/409.png
ggmap(map) +
geom_point(data = Los_Angeles, mapping = aes(x = longitude, y = latitude,
col = price_2)) +
scale_color_distiller(palette = "YlOrRd", direction = 1)