Assignment 6

#install.packages("readxl")

library(readxl)
library(dplyr)

Attaching package: 'dplyr'
The following objects are masked from 'package:stats':

    filter, lag
The following objects are masked from 'package:base':

    intersect, setdiff, setequal, union
df <- read_excel("Airbnb_DC_25.csv")
#look at the data
summary(df)
       id                   name         host_id              host_name   
 Min.   :3.686e+03   Length   :6257   Min.   :     4617   Length   :6257  
 1st Qu.:3.792e+07   N.unique :5996   1st Qu.: 22024017   N.unique :1860  
 Median :7.501e+17   N.blank  :   0   Median : 81005284   N.blank  :   0  
 Mean   :6.159e+17   Min.nchar:   4   Mean   :176451046   Min.nchar:   1  
 3rd Qu.:1.143e+18   Max.nchar: 107   3rd Qu.:304261532   Max.nchar:  34  
 Max.   :1.375e+18                    Max.   :681391481   NAs      :   2  
                                                                          
 neighbourhood_group   neighbourhood     latitude       longitude     
 Mode:logical        Length   :6257   Min.   :38.82   Min.   :-77.11  
 NAs :6257           N.unique :  39   1st Qu.:38.90   1st Qu.:-77.03  
                     N.blank  :   0   Median :38.91   Median :-77.01  
                     Min.nchar:  18   Mean   :38.91   Mean   :-77.01  
                     Max.nchar:  97   3rd Qu.:38.92   3rd Qu.:-76.99  
                                      Max.   :38.99   Max.   :-76.91  
                                                                      
     room_type        price        minimum_nights   number_of_reviews
 Length   :6257   Min.   :  10.0   Min.   :  1.00   Min.   :   0.00  
 N.unique :   4   1st Qu.:  88.0   1st Qu.:  1.00   1st Qu.:   1.00  
 N.blank  :   0   Median : 131.0   Median :  2.00   Median :  19.00  
 Min.nchar:  10   Mean   : 168.7   Mean   : 13.23   Mean   :  66.38  
 Max.nchar:  15   3rd Qu.: 193.0   3rd Qu.: 31.00   3rd Qu.:  86.00  
                  Max.   :7000.0   Max.   :701.00   Max.   :1205.00  
                  NAs    :1488                                       
  last_review                  reviews_per_month calculated_host_listings_count
 Min.   :2013-06-15 00:00:00   Min.   : 0.010    Min.   :  1.00                
 1st Qu.:2024-10-17 00:00:00   1st Qu.: 0.470    1st Qu.:  1.00                
 Median :2025-01-23 00:00:00   Median : 1.460    Median :  3.00                
 Mean   :2024-09-12 12:48:19   Mean   : 1.974    Mean   : 33.15                
 3rd Qu.:2025-02-27 00:00:00   3rd Qu.: 2.940    3rd Qu.: 14.00                
 Max.   :2025-03-14 00:00:00   Max.   :28.200    Max.   :289.00                
 NAs    :1236                  NAs    :1236                                    
 availability_365 number_of_reviews_ltm      license    
 Min.   :  0.0    Min.   :  0.0         Length   :6257  
 1st Qu.: 43.0    1st Qu.:  0.0         N.unique :2459  
 Median :175.0    Median :  5.0         N.blank  :   0  
 Mean   :175.8    Mean   : 15.8         Min.nchar:   6  
 3rd Qu.:303.0    3rd Qu.: 25.0         Max.nchar:  72  
 Max.   :365.0    Max.   :290.0         NAs      :1560  
                                                        
#view(df)
by_neighborhood <- df |>
  group_by(neighbourhood)|> #grouping all neighborhoods
  summarise(count=n(),#totals for each neighborhood
            avg_cost = mean(price, na.rm =TRUE),
            avg_min_night = mean(minimum_nights, na.rm=TRUE),
            .groups = "drop"
            ) |>
  arrange(neighbourhood)
by_neighborhood_matrix <- data.matrix(by_neighborhood[, -1])
row.names(by_neighborhood_matrix) <- by_neighborhood$neighbourhood
#str(by_neighborhood)
#heatmap by neighborhood
library(viridis)
Loading required package: viridisLite
by_neighborhood_heatmap <- heatmap(by_neighborhood_matrix,
                                   Rowv=NA,
                                   Colv=NA,
                                   col =viridis(25),
                                   cexCol=.5,
                                   cexRow =.4,
                                   scale="column",
                                   xlab="",
                                   ylab="Neighborhoods",
                                   margins=c(10,15),
                                   main ="Heatmap of Airbnb Neighborhoods in Washington, DC")