listings <- as.data.frame(fread("listings.csv"))
reviews <- as.data.frame(fread("reviews.csv"))
neighbourhoods <- as.data.frame(fread("neighbourhoods.csv"))

list.files()
##  [1] "$RECYCLE.BIN"               "Bellboy Logistics.accdb"   
##  [3] "Custom Office Templates"    "Database1.accdb"           
##  [5] "Database2.accdb"            "desktop.ini"               
##  [7] "FINAL EXAM.accdb"           "Flight Database.accdb"     
##  [9] "Hotel ERD.accdb"            "IS Project.pptx"           
## [11] "listings.csv"               "My SAS Files"              
## [13] "My Tableau Repository"      "neighbourhoods.csv"        
## [15] "R"                          "R-Assignment.html"         
## [17] "R-Assignment.Rmd"           "R_Projects"                
## [19] "reviews.csv"                "RPAIAD-Learn-student-files"
## [21] "Rscript1414d80ff3"          "RtmpArRStp"                
## [23] "RtmpU5fZnU"                 "Thumbs.db"
new_df <- listings %>%
  select(neighbourhood, room_type) %>%
  filter(!is.na(neighbourhood), !is.na(room_type)) %>%
  group_by(neighbourhood, room_type) %>%
  summarise(n = n(), .groups = 'keep') %>%
  data.frame()

agg_tot <- new_df %>%
  group_by(neighbourhood) %>%
  summarise(tot = sum(n), .groups = 'keep') %>%
  data.frame()

max_y <- round_any(max(agg_tot$tot), 500, ceiling)

ggplot(new_df, aes(x = reorder(neighbourhood, n, sum), y = n, fill = room_type)) +
  geom_bar(stat = "identity", position = position_stack(reverse = TRUE)) +
  coord_flip() +
  labs(title = "Milan Airbnbs: Listings by Neighbourhood",
       x = "", y = "Amount of Listings", fill = "Room Type") +
  theme_light() +
  theme(plot.title = element_text(hjust =0.5),
        axis.text.y = element_text(size = 7)) +
  scale_fill_brewer(palette = "Paired",
                    guide = guide_legend(reverse = TRUE)) +
  geom_text(data = agg_tot,
            aes(x = neighbourhood, y = tot,
                label = scales::comma(tot), fill = NULL),
            hjust = -0.1, size = 2) +
  scale_y_continuous(labels = comma,
                     breaks = seq(0, max_y, by = 500),
                     limits = c(0, max_y))

reviews_df <- reviews %>%
  mutate(date = ymd(date)) %>%
  filter(!is.na(date)) %>%
  mutate(month = format(date, '%Y-%m')) %>%
  group_by(month) %>%
  summarise(n = n(), .groups = 'keep') %>%
  data.frame()

reviews_df$month_date <- ymd(paste0(reviews_df$month, "-01"))

hi_lo <- reviews_df %>%
  filter(n == min(n) | n == max(n)) %>%
  data.frame()

ggplot(reviews_df, aes(x = month_date, y = n)) +
  geom_line(color = 'black', size = 1) +
  geom_point(shape = 21, size = 3,
             color = 'black', fill = 'white') +
  labs(title = "Milan Airbnbs: Reviews by Month",
       x = "Month", y = "Amount of Reviews") +
  scale_y_continuous(labels = comma) +
  theme_light() +
  theme(plot.title = element_text(hjust = 0.5)) +
  geom_point(data = hi_lo,
             aes(x = month_date, y = n),
             shape = 21, size = 4,
             fill = 'white', color = 'black') +
  geom_label_repel(data = hi_lo,
                   aes(label = paste0(format(month_date, '%b %Y'),
                                      ": ", scales::comma(n))),
                   size = 4,
                   color = 'grey50',
                   segment.color = 'darkblue')
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once per session.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

room_df <- listings %>%
  select(room_type) %>%
  filter(!is.na(room_type)) %>%
  group_by(room_type) %>%
  summarise(n = n(), .groups = 'keep') %>%
  data.frame()

max_y2 <- round_any(max(room_df$n), 500, ceiling)

ggplot(room_df,
       aes(x = reorder(room_type, n),
           y = n, fill = room_type)) +
  geom_bar(stat = "identity",
           position = position_stack()) +
  coord_flip() +
  labs(title = "Milan Airbnbs: Listings by Room Type",
       x = "", y = "Amount of Listings",
       fill = "Room Type") +
  theme_light()+
  theme(plot.title = element_text(hjust = 0.5)) +
  scale_fill_brewer(palette = "Paired") +
  scale_y_continuous(labels = comma,
                     breaks = seq(0, max_y2, by = 500),
                     limits = c(0, max_y2))

year_df <- listings %>%
  select(last_review) %>%
  mutate(last_review = ymd(last_review),
         year = year(last_review)) %>%
  filter(!is.na(year)) %>%
  group_by(year) %>%
  summarise(n = n(), .groups = 'keep') %>%
  data.frame()

ggplot(year_df, aes(x = year, y = n)) +
  geom_line(color = "black", size = 1) +
  geom_point(shape = 21, size = 3,
             color = "black", fill = "white") +
  labs(title = "Milan Airbnbs: Listings by Year",
       x = "Year", y = "Amount of Listings") +
  scale_y_continuous(labels = comma) +
  theme_light() +
  theme(plot.title = element_text(hjust = 0.5))

pie_df <- listings %>%
  select(room_type) %>%
  filter(!is.na(room_type)) %>%
  group_by(room_type) %>%
  summarise(n = n(), .groups = 'keep') %>%
  data.frame()

ggplot(pie_df, aes(x = "", y = n,
                   fill = room_type)) +
  geom_bar(stat = "identity",
           position = "fill") +
  coord_polar(theta = "y", start = 0) +
  labs(title = "Milan Airbnbs: Room Type Share",
       x = NULL, y = NULL,
       fill = "Room Type") +
  theme_light() +
  theme(plot.title = element_text(hjust = 0.5)) +
  scale_fill_brewer(palette = "Paired")