listings <- as.data.frame(fread("listings.csv"))
reviews <- as.data.frame(fread("reviews.csv"))
neighbourhoods <- as.data.frame(fread("neighbourhoods.csv"))
list.files()
## [1] "$RECYCLE.BIN" "Bellboy Logistics.accdb"
## [3] "Custom Office Templates" "Database1.accdb"
## [5] "Database2.accdb" "desktop.ini"
## [7] "FINAL EXAM.accdb" "Flight Database.accdb"
## [9] "Hotel ERD.accdb" "IS Project.pptx"
## [11] "listings.csv" "My SAS Files"
## [13] "My Tableau Repository" "neighbourhoods.csv"
## [15] "R" "R-Assignment.html"
## [17] "R-Assignment.Rmd" "R_Projects"
## [19] "reviews.csv" "RPAIAD-Learn-student-files"
## [21] "Rscript1414d80ff3" "RtmpArRStp"
## [23] "RtmpU5fZnU" "Thumbs.db"
new_df <- listings %>%
select(neighbourhood, room_type) %>%
filter(!is.na(neighbourhood), !is.na(room_type)) %>%
group_by(neighbourhood, room_type) %>%
summarise(n = n(), .groups = 'keep') %>%
data.frame()
agg_tot <- new_df %>%
group_by(neighbourhood) %>%
summarise(tot = sum(n), .groups = 'keep') %>%
data.frame()
max_y <- round_any(max(agg_tot$tot), 500, ceiling)
ggplot(new_df, aes(x = reorder(neighbourhood, n, sum), y = n, fill = room_type)) +
geom_bar(stat = "identity", position = position_stack(reverse = TRUE)) +
coord_flip() +
labs(title = "Milan Airbnbs: Listings by Neighbourhood",
x = "", y = "Amount of Listings", fill = "Room Type") +
theme_light() +
theme(plot.title = element_text(hjust =0.5),
axis.text.y = element_text(size = 7)) +
scale_fill_brewer(palette = "Paired",
guide = guide_legend(reverse = TRUE)) +
geom_text(data = agg_tot,
aes(x = neighbourhood, y = tot,
label = scales::comma(tot), fill = NULL),
hjust = -0.1, size = 2) +
scale_y_continuous(labels = comma,
breaks = seq(0, max_y, by = 500),
limits = c(0, max_y))

reviews_df <- reviews %>%
mutate(date = ymd(date)) %>%
filter(!is.na(date)) %>%
mutate(month = format(date, '%Y-%m')) %>%
group_by(month) %>%
summarise(n = n(), .groups = 'keep') %>%
data.frame()
reviews_df$month_date <- ymd(paste0(reviews_df$month, "-01"))
hi_lo <- reviews_df %>%
filter(n == min(n) | n == max(n)) %>%
data.frame()
ggplot(reviews_df, aes(x = month_date, y = n)) +
geom_line(color = 'black', size = 1) +
geom_point(shape = 21, size = 3,
color = 'black', fill = 'white') +
labs(title = "Milan Airbnbs: Reviews by Month",
x = "Month", y = "Amount of Reviews") +
scale_y_continuous(labels = comma) +
theme_light() +
theme(plot.title = element_text(hjust = 0.5)) +
geom_point(data = hi_lo,
aes(x = month_date, y = n),
shape = 21, size = 4,
fill = 'white', color = 'black') +
geom_label_repel(data = hi_lo,
aes(label = paste0(format(month_date, '%b %Y'),
": ", scales::comma(n))),
size = 4,
color = 'grey50',
segment.color = 'darkblue')
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once per session.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

room_df <- listings %>%
select(room_type) %>%
filter(!is.na(room_type)) %>%
group_by(room_type) %>%
summarise(n = n(), .groups = 'keep') %>%
data.frame()
max_y2 <- round_any(max(room_df$n), 500, ceiling)
ggplot(room_df,
aes(x = reorder(room_type, n),
y = n, fill = room_type)) +
geom_bar(stat = "identity",
position = position_stack()) +
coord_flip() +
labs(title = "Milan Airbnbs: Listings by Room Type",
x = "", y = "Amount of Listings",
fill = "Room Type") +
theme_light()+
theme(plot.title = element_text(hjust = 0.5)) +
scale_fill_brewer(palette = "Paired") +
scale_y_continuous(labels = comma,
breaks = seq(0, max_y2, by = 500),
limits = c(0, max_y2))

year_df <- listings %>%
select(last_review) %>%
mutate(last_review = ymd(last_review),
year = year(last_review)) %>%
filter(!is.na(year)) %>%
group_by(year) %>%
summarise(n = n(), .groups = 'keep') %>%
data.frame()
ggplot(year_df, aes(x = year, y = n)) +
geom_line(color = "black", size = 1) +
geom_point(shape = 21, size = 3,
color = "black", fill = "white") +
labs(title = "Milan Airbnbs: Listings by Year",
x = "Year", y = "Amount of Listings") +
scale_y_continuous(labels = comma) +
theme_light() +
theme(plot.title = element_text(hjust = 0.5))

pie_df <- listings %>%
select(room_type) %>%
filter(!is.na(room_type)) %>%
group_by(room_type) %>%
summarise(n = n(), .groups = 'keep') %>%
data.frame()
ggplot(pie_df, aes(x = "", y = n,
fill = room_type)) +
geom_bar(stat = "identity",
position = "fill") +
coord_polar(theta = "y", start = 0) +
labs(title = "Milan Airbnbs: Room Type Share",
x = NULL, y = NULL,
fill = "Room Type") +
theme_light() +
theme(plot.title = element_text(hjust = 0.5)) +
scale_fill_brewer(palette = "Paired")
