Function to get amenity data for AirBnb

# Import the data 
airbnb_data <- read_csv(file = 'up_to_date_clean_data_version_one_bedroom.csv')
## Rows: 3789 Columns: 76
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (32): host_name, listing_url, name, description, neighborhood_overview, ...
## dbl (43): id, host_id, scrape_id, host_is_superhost_bin, host_listings_count...
## lgl  (1): has_availability
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# Sort the dataset by expected revenue in descending order
sortedairbnb_data <- airbnb_data %>% arrange(desc(full_year_expected_revenue)) 

# add row id
sortedairbnb_data <- sortedairbnb_data %>%
  mutate(unique_id = row_number())

write_xlsx(sortedairbnb_data, "airbnb_cleaned_data.xlsx")

# get top 200 listings based on revenue 
top_200_revenue <- sortedairbnb_data %>%
  top_n(200, full_year_expected_revenue)

# Extract the ammenity column with arrays of strings
amenitiesArrayColumn <- top_200_revenue$amenities 
#for full count switch to sortedairbnb_data

# Split the amenities into separate strings
splitAmmentityStrings <- strsplit(amenitiesArrayColumn, ",\\s*")

# Create new data frame with split ammentity strings
df <- data.frame(strings = unlist(splitAmmentityStrings))

# Count the frequencies of amenities 
ammentiyCounts <- df %>%
  group_by(strings) %>%
  summarize(count = n()) %>%
  arrange(desc(count))

# Print the ordered data frame
print.data.frame(ammentiyCounts)
##                                                       strings count
## 1                                               "Smoke alarm"   192
## 2                                                      "Wifi"   185
## 3                                     "Carbon monoxide alarm"   175
## 4                                                "Hair dryer"   175
## 5                                                      "Iron"   169
## 6                                                   "Shampoo"   161
## 7                                                   "Hangers"   156
## 8                                                   "Heating"   155
## 9                                                "Essentials"   144
## 10                                                "Hot water"   139
## 11                                             "Refrigerator"   134
## 12                                               "Bed linens"   131
## 13                                                  "Kitchen"   115
## 14                                                "Microwave"   115
## 15                                            "First aid kit"   113
## 16                                             "Coffee maker"   110
## 17                                        "Fire extinguisher"   108
## 18                                                       "TV"   103
## 19                                               "Dishwasher"   101
## 20                                    "Dishes and silverware"   100
## 21                                         "Air conditioning"    99
## 22                                                   "Washer"    96
## 23                                                 "Elevator"    95
## 24                                      "Dedicated workspace"    91
## 25                                         "Private entrance"    79
## 26                                           "Cooking basics"    72
## 27                                            "Self check-in"    72
## 28                               "Extra pillows and blankets"    70
## 29                                  "Luggage dropoff allowed"    66
## 30                                                    ["Oven"    60
## 31                                           "Building staff"    57
## 32                                             "Pets allowed"    57
## 33                                                  "Bathtub"    55
## 34                                                    "Dryer"    55
## 35                                  "Long term stays allowed"    52
## 36                                 "Paid parking on premises"    49
## 37                                   "TV with standard cable"    48
## 38                                                      "Gym"    46
## 39                                           ["Self check-in"    44
## 40                                "Paid parking off premises"    43
## 41                                         "Hot water kettle"    41
## 42                                              ["Essentials"    41
## 43                                                 "Lockbox"]    40
## 44                                                    "Stove"    39
## 45                                                "Body soap"    37
## 46                                             "Dining table"    37
## 47                                                   "Coffee"    36
## 48                                     "Lock on bedroom door"    36
## 49                                                     "Gym"]    35
## 50                                      "Private living room"    35
## 51                                               "Shower gel"    35
## 52                                                   "Dryer"]    33
## 53                                      "Ethernet connection"    33
## 54                                                  "Freezer"    32
## 55                                                 "Backyard"    30
## 56                                        "City skyline view"    30
## 57                                      "Free street parking"    30
## 58                                       "Exercise equipment"    28
## 59                                             "Wine glasses"    28
## 60                                        "Cleaning products"    27
## 61                                              "Conditioner"    27
## 62                                 "Free parking on premises"    27
## 63                                                 "Kitchen"]    27
## 64                                                  "Toaster"    26
## 65                              "Dryer \\u2013\\u00a0In unit"    25
## 66                                    "Room-darkening shades"    25
## 67                             "Washer \\u2013\\u00a0In unit"    25
## 68                                         "Patio or balcony"    24
## 69                                                     "Crib"    23
## 70                                                 "Heating"]    22
## 71                                         "Indoor fireplace"    22
## 72                                        "Outdoor furniture"    20
## 73                                                     "Safe"    20
## 74                                 "Central air conditioning"    19
## 75                                          "Host greets you"    19
## 76                           "Pack \\u2019n play/Travel crib"    19
## 77                                       "Single level home"]    17
## 78                                             "Baking sheet"    16
## 79                                                "Breakfast"    16
## 80                                       "Fire extinguisher"]    16
## 81                           "Paid valet parking on premises"    16
## 82                             "Security cameras on property"    16
## 83                                                DVD player"    16
## 84                               "Babysitter recommendations"    15
## 85                               "Books and reading material"    15
## 86                               "Free dryer \\u2013 In unit"    15
## 87                                            "Resort access"    15
## 88                                                  "Blender"    14
## 89                                                    "Sauna"    14
## 90                                                "BBQ grill"    13
## 91                                  "HDTV with standard cable    13
## 92                                 "Private patio or balcony"    13
## 93                            ["Security cameras on property"    13
## 94                                          "Central heating"    12
## 95                                      "Outdoor dining area"    12
## 96                                   "Shared gym in building"    12
## 97                                                 ["Shampoo"    12
## 98                                                 "Bay view"    11
## 99                              "Free washer \\u2013 In unit"    11
## 100                                                    "Pool"    11
## 101                                           "Portable fans"    10
## 102                                        "Clothing storage"     9
## 103                                       "Laundromat nearby"     9
## 104                                              "Smart lock"     9
## 105                                                   Netflix     9
## 106                                             "Board games"     8
## 107                                             "Ceiling fan"     8
## 108                          "Cleaning available during stay"     8
## 109                                                "Fire pit"     8
## 110                                                  "Keypad"     8
## 111                                  ["Carbon monoxide alarm"     8
## 112                         "Coffee maker: drip coffee maker"     7
## 113                          "Free dryer \\u2013 In building"     7
## 114                         "Free washer \\u2013 In building"     7
## 115                   "Private backyard \\u2013 Fully fenced"     7
## 116                                            "Sound system"     7
## 117                                    "Stainless steel oven"     7
## 118                                                      Hulu     7
## 119                    "Clothing storage: dresser and closet"     6
## 120                                             "Mini fridge"     6
## 121                                              "Pool table"     6
## 122                                 "Shared patio or balcony"     6
## 123                     "Coffee maker: Keurig coffee machine"     5
## 124                                          "Shared hot tub"     5
## 125                                ["Luggage dropoff allowed"     5
## 126                                                   Disney+     5
## 127                                                   HBO Max     5
## 128                                                  Netflix"     5
## 129                                                   "Bidet"     4
## 130                                       "Bose sound system"     4
## 131                                "Drying rack for clothing"     4
## 132                                            "Game console"     4
## 133                                               "Gas stove"     4
## 134                                                 "Hot tub"     4
## 135                                              "Ocean view"     4
## 136                                                    "Oven"     4
## 137                                        Amazon Prime Video     4
## 138                                            premium cable"     4
## 139                    "Clothing storage: closet and dresser"     3
## 140                                 "Coffee maker: Nespresso"     3
## 141                                              "EV charger"     3
## 142                                        "Fireplace guards"     3
## 143                                             "Garden view"     3
## 144                                     "HDTV with Chromecast     3
## 145                                             "Pocket wifi"     3
## 146                                              "Rice maker"     3
## 147                    "Shared backyard \\u2013 Fully fenced"     3
## 148                               "Stainless steel gas stove"     3
## 149                                              "Waterfront"     3
## 150                          ["55\\" HDTV with standard cable     3
## 151                                         ["Cooking basics"     3
## 152                       ["Paid parking garage off premises"     3
## 153                                             premium cable     3
## 154                                                     Roku"     3
## 155                    "Bertazzoni stainless steel gas stove"     2
## 156                                                   "Bikes"     2
## 157                                "Clothing storage: closet"     2
## 158            "Clothing storage: walk-in closet and closet"]     2
## 159                              "Clothing storage: wardrobe"     2
## 160                          "Coffee maker: drip coffee maker     2
## 161                          "Coffee maker: pour-over coffee"     2
## 162                                          "Courtyard view"     2
## 163                                          "Electric stove"     2
## 164                           "Exercise equipment: elliptical     2
## 165                                      "Free resort access"     2
## 166                             "HDTV with Amazon Prime Video     2
## 167                                   "Indoor fireplace: gas"     2
## 168                                           "Mountain view"     2
## 169                          "Paid dryer \\u2013 In building"     2
## 170                           "Paid parking lot off premises"     2
## 171                        "Paid street parking off premises"     2
## 172                         "Paid washer \\u2013 In building"     2
## 173                                         "Portable heater"     2
## 174                                         "Radiant heating"     2
## 175                                           "Record player"     2
## 176                                         "Smoking allowed"     2
## 177                                            "Sun loungers"     2
## 178                                   "TV with standard cable     2
## 179                                                   "Wifi"]     2
## 180                                                  Apple TV     2
## 181                                                 Apple TV"     2
## 182                                              free weights     2
## 183                                         pour-over coffee"     2
## 184                                                      Roku     2
## 185                                           stationary bike     2
## 186                                                 yoga mat"     2
## 187                                 "100\\" TV with Apple TV"     1
## 188                       "43\\" HDTV with Amazon Prime Video     1
## 189                               "50\\" HDTV with Chromecast     1
## 190                                  "50\\" HDTV with Disney+     1
## 191                                 "50\\" HDTV with Netflix"     1
## 192                          "50\\" HDTV with standard cable"     1
## 193                                  "55\\" HDTV with HBO Max     1
## 194                       "60\\" HDTV with Amazon Prime Video     1
## 195                       "65\\" HDTV with Amazon Prime Video     1
## 196                                 "75\\" HDTV with Apple TV     1
## 197                                 "Backyard - Fully fenced"     1
## 198                                       "Barbecue utensils"     1
## 199                         "Beach access \\u2013 Beachfront"     1
## 200                                            "Beach access"     1
## 201                                        "Beach essentials"     1
## 202                                              "Beach view"     1
## 203                                  "Bluetooth sound system"     1
## 204                                             "Bosch  oven"     1
## 205                                            "Bosch  stove"     1
## 206                    "Bosch stainless steel electric stove"     1
## 207                              "Bosch stainless steel oven"     1
## 208                   "Bose Sound Bar Bluetooth sound system"     1
## 209   "Children\\u2019s books and toys for ages 0-2 years old     1
## 210                         "Children\\u2019s books and toys"     1
## 211                             "Children\\u2019s dinnerware"     1
## 212                               "Clothing storage: dresser"     1
## 213                        "Clothing storage: walk-in closet"     1
## 214                               "Clothing storage: wardrobe     1
## 215                                       "Clothing storage"]     1
## 216                                  "Coffee maker: Nespresso     1
## 217                           "Dcs stainless steel gas stove"     1
## 218                   "Dr. Bronner's Pure Castille body soap"     1
## 219                               "Dryer \\u2013 In building"     1
## 220                                "Eco Friendly conditioner"     1
## 221                                  "EO lavender  body soap"     1
## 222                         "Exercise equipment: free weights     1
## 223                            "Exercise equipment: yoga mat"     1
## 224                              "Fast wifi \\u2013 124 Mbps"     1
## 225                              "Fast wifi \\u2013 259 Mbps"     1
## 226                              "Fast wifi \\u2013 275 Mbps"     1
## 227                              "Fast wifi \\u2013 328 Mbps"     1
## 228                              "Fast wifi \\u2013 337 Mbps"     1
## 229                              "Fast wifi \\u2013 458 Mbps"     1
## 230                              "Fast wifi \\u2013 564 Mbps"     1
## 231                               "Fast wifi \\u2013 78 Mbps"     1
## 232                               "Fast wifi \\u2013 86 Mbps"     1
## 233                "Free carport on premises \\u2013 1 space"     1
## 234      "Free driveway parking on premises \\u2013 2 spaces"     1
## 235                                              "Free dryer"     1
## 236         "Free parking garage on premises \\u2013 1 space"     1
## 237     "Free residential garage on premises \\u2013 1 space"     1
## 238                                             "Free washer"     1
## 239                      "Gaggenau stainless steel gas stove"     1
## 240                                           "Gas oven oven"     1
## 241                                         "Gym in building"     1
## 242                                             "Hair dryer"]     1
## 243                                                 "Hammock"     1
## 244                                             "Harbor view"     1
## 245                                        "HDTV with Disney+     1
## 246                                        "HDTV with HBO Max     1
## 247                                           "HDTV with Hulu     1
## 248                                          "HDTV with Roku"     1
## 249                                "HDTV with standard cable"     1
## 250                                "Honest Company body soap"     1
## 251                              "Honest Company conditioner"     1
## 252                                  "Honest Company shampoo"     1
## 253                                                 "Lockbox"     1
## 254                                             "Marina view"     1
## 255                               "Multiple brands body soap"     1
## 256                             "Multiple brands conditioner"     1
## 257                                 "Multiple brands shampoo"     1
## 258                                         "Outdoor kitchen"     1
## 259                                          "Outdoor shower"     1
## 260                                           "Outlet covers"     1
## 261 "Pack \\u2019n play/Travel crib - available upon request"     1
## 262                        "Paid parking garage off premises"     1
## 263         "Paid parking garage on premises \\u2013 1 space"     1
## 264                                      "Paid resort access"     1
## 265          "Paid valet parking on premises \\u2013 1 space"     1
## 266                                               "Pool view"     1
## 267                               "Portable air conditioning"     1
## 268                                       "Private entrance"]     1
## 269                                 "Private gym in building"     1
## 270                                         "Private hot tub"     1
## 271                           "Public or shared beach access"     1
## 272                                          "Record player"]     1
## 273                                       "Samsung gas stove"     1
## 274                                   "Shared BBQ grill: gas"     1
## 275                              "Shared indoor pool - heated     1
## 276                                  "Shared outdoor kitchen"     1
## 277                         "Shared pool - available all year     1
## 278                                             "Shared pool"     1
## 279                                             "Single oven"     1
## 280                                            "Smoke alarm"]     1
## 281                            "Sonos Bluetooth sound system"     1
## 282             "Sonos Surround Sound Bluetooth sound system"     1
## 283                     "Sound system with Bluetooth and aux"     1
## 284           "Standalone high chair - always at the listing"     1
## 285                                   "Sub zero refrigerator"     1
## 286                                          "TV with Netflix     1
## 287                                         "TV with Netflix"     1
## 288                                             "Valley view"     1
## 289                        "Washer \\u2013\\u00a0In building"     1
## 290                                    "Wifi \\u2013 43 Mbps"     1
## 291                                           "Window guards"     1
## 292                                          "Wolf gas stove"     1
## 293                                               "Wolf oven"     1
## 294                                       "Wolf refrigerator"     1
## 295                               "Wolf stainless steel oven"     1
## 296                                         ["Building staff"     1
## 297                                           ["Coffee maker"     1
## 298                                                   ["Crib"     1
## 299                             ["Fast wifi \\u2013 322 Mbps"     1
## 300                                          ["HDTV with Hulu     1
## 301                                             ["Pool table"     1
## 302                                           ["Shared sauna"     1
## 303                                   ["Wifi \\u2013 43 Mbps"     1
## 304                                             2-5 years old     1
## 305                                       Amazon Prime Video"     1
## 306                                       and 5-10 years old"     1
## 307                                               and closet"     1
## 308                                                Chromecast     1
## 309                                                  Disney+"     1
## 310                                              DVD player"]     1
## 311                                                  Fire TV"     1
## 312                                              french press     1
## 313                                                     Hulu"     1
## 314                                    Keurig coffee machine"     1
## 315                                                  lap pool     1
## 316                                                  rooftop"     1
## 317                                                saltwater"     1
## 318                                            standard cable     1
## 319                                                 treadmill     1
## 320                                                treadmill"     1
## 321                                            walk-in closet     1
#write data frame to excel
write_xlsx(ammentiyCounts, "amenity_counts_new_version.xlsx")

script to analyze revenue with top ammentites

# import same airbnb data as above 
airbnb_data_2 <- read_csv(file = 'up_to_date_clean_data_version_one_bedroom.csv')
## Rows: 3789 Columns: 76
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (32): host_name, listing_url, name, description, neighborhood_overview, ...
## dbl (43): id, host_id, scrape_id, host_is_superhost_bin, host_listings_count...
## lgl  (1): has_availability
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# add row id
airbnb_data_2 <- airbnb_data_2 %>%
  mutate(unique_id = row_number())

# Create list of amenities of interest (top 10 amenities based on count of ammentity listed for top revenue listings)
essentials <- c("Smoke", "Wifi", "Hangers", "Carbon", "Hair", "Water", "Iron", "Essentials", "Shampoo", "Heating")

# Duplicate airbnb_data
essential_amenities_df <- data.frame(airbnb_data_2)

# Iterate over amenities in essentials
for (amenity in essentials) {
  essential_amenities_df <- essential_amenities_df %>%
    # Create boolean column
    mutate(!!amenity :=
      str_detect(
        amenities,
        regex(amenity, ignore_case = TRUE)
      ))
}

# Group by listing
listing_essential_amenities <- essential_amenities_df %>%
  group_by(unique_id, id, full_year_expected_revenue) %>%
  summarise(
    Smoke_Alarm = any(Smoke),
    Wifi = any(Wifi),
    Hangers = any(Hangers),
    Carbon_Monoxide_Alarm = any(Carbon),
    Hair_Dryer = any(Hair),
    Hot_Water = any(Water),
    Iron = any(Iron),
    Essentials = any(Essentials),
    Shampoo = any(Shampoo),
    Heating = any(Heating)
  ) %>%
  pivot_longer(
    cols = -unique_id,
    names_to = "amenities",
    values_to = "listed"
  )
## `summarise()` has grouped output by 'unique_id', 'id'. You can override using the `.groups` argument.
# Merge price data
listing_essential_amenities <- listing_essential_amenities %>%
  left_join(essential_amenities_df %>% select(id, full_year_expected_revenue, unique_id), by = "unique_id")

write_xlsx(listing_essential_amenities, "amenity_ListingId_revenue_new_updated.xlsx")

# update column type to numeric for listed column
listing_essential_amenities$listed <- as.numeric(listing_essential_amenities$listed)

# Aggregate the data using summarizing function 
df_summarized <- listing_essential_amenities %>%
  group_by(unique_id, id, full_year_expected_revenue, amenities) %>%
  summarise(listed = max(as.integer(listed))) %>%
  ungroup()
## `summarise()` has grouped output by 'unique_id', 'id', 'full_year_expected_revenue'. You can override using the `.groups` argument.
# Spread the dataframe to create new columns for each ammentiy
df_spread <- spread(df_summarized, key = amenities, value = listed, fill = 0)

# Print the spread dataframe
print(df_spread)
## # A tibble: 3,789 × 13
##    unique_id Carbon_Monoxide_A… Essentials full_year_expecte… Hair_Dryer Hangers
##        <int>              <dbl>      <dbl>              <dbl>      <dbl>   <dbl>
##  1         1                  1          1              47874          1       1
##  2         2                  1          0                  0          0       1
##  3         3                  1          1              22660          1       1
##  4         4                  1          1               3400          0       1
##  5         5                  1          1                  0          1       1
##  6         6                  1          1              17050          1       1
##  7         7                  1          1               3055          1       1
##  8         8                  1          0                  0          0       0
##  9         9                  1          1               7420          1       1
## 10        10                  1          1              66220          1       1
## # … with 3,779 more rows, and 7 more variables: Heating <dbl>, Hot_Water <dbl>,
## #   id <dbl>, Iron <dbl>, Shampoo <dbl>, Smoke_Alarm <dbl>, Wifi <dbl>
# Sort the dataset by expected revenue in descending order
sortedairbnb_data2 <- df_spread %>% arrange(desc(full_year_expected_revenue)) 

# add row id
sortedairbnb_data2 <- sortedairbnb_data2 %>%
  mutate(unique_id = row_number())

write_xlsx(sortedairbnb_data2, "ammenties_final_pivot.xlsx")