library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✓ ggplot2 3.3.5 ✓ purrr 0.3.4
## ✓ tibble 3.1.6 ✓ dplyr 1.0.7
## ✓ tidyr 1.1.4 ✓ stringr 1.4.0
## ✓ readr 2.1.0 ✓ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(readr)
library(leaflet)
library(ggmap)
## Google's Terms of Service: https://cloud.google.com/maps-platform/terms/.
## Please cite ggmap if you use it! See citation("ggmap") for details.
library(caTools)
calendar <- read_csv("/cloud/project/01_data/boston/calendar.csv")
## Rows: 1139165 Columns: 7
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): price, adjusted_price
## dbl (3): listing_id, minimum_nights, maximum_nights
## lgl (1): available
## date (1): date
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# get an overview of the data (calendar)
str(calendar)
## spec_tbl_df [1,139,165 × 7] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
## $ listing_id : num [1:1139165] 3781 3781 3781 3781 1374434 ...
## $ date : Date[1:1139165], format: "2021-09-19" "2021-09-20" ...
## $ available : logi [1:1139165] FALSE FALSE FALSE FALSE FALSE FALSE ...
## $ price : chr [1:1139165] "$125.00" "$125.00" "$125.00" "$125.00" ...
## $ adjusted_price: chr [1:1139165] "$125.00" "$125.00" "$125.00" "$125.00" ...
## $ minimum_nights: num [1:1139165] 32 32 32 32 28 28 28 28 28 28 ...
## $ maximum_nights: num [1:1139165] 1125 1125 1125 1125 270 ...
## - attr(*, "spec")=
## .. cols(
## .. listing_id = col_double(),
## .. date = col_date(format = ""),
## .. available = col_logical(),
## .. price = col_character(),
## .. adjusted_price = col_character(),
## .. minimum_nights = col_double(),
## .. maximum_nights = col_double()
## .. )
## - attr(*, "problems")=<externalptr>
summary(calendar)
## listing_id date available price
## Min. : 3781 Min. :2021-09-19 Mode :logical Length:1139165
## 1st Qu.:18194673 1st Qu.:2021-12-19 FALSE:578436 Class :character
## Median :35425876 Median :2022-03-20 TRUE :560729 Mode :character
## Mean :31911601 Mean :2022-03-20
## 3rd Qu.:47487652 3rd Qu.:2022-06-19
## Max. :52324087 Max. :2022-09-18
## adjusted_price minimum_nights maximum_nights
## Length:1139165 Min. : 1.0 Min. :1.000e+00
## Class :character 1st Qu.: 2.0 1st Qu.:5.000e+02
## Mode :character Median : 29.0 Median :1.125e+03
## Mean : 74.6 Mean :4.129e+06
## 3rd Qu.: 91.0 3rd Qu.:1.125e+03
## Max. :1000.0 Max. :2.147e+09
head(calendar)
# Transform price as numeric
calendar$price <- str_remove_all(calendar$price, "[$]")
calendar$price <- str_remove_all(calendar$price, "[,]")
calendar$price <- as.numeric(calendar$price)
# Transform available as character
calendar$available <- ifelse(calendar$available == TRUE, "t", "f")
# Transform date as Date
calendar$date <- as.Date(calendar$date)
# pre-set theme for upcoming visualisations
theme_set(theme_bw())
avail_by_date <- calendar %>%
group_by(date, available) %>%
summarise(n = n()) %>%
filter(available == "t")
## `summarise()` has grouped output by 'date'. You can override using the `.groups` argument.
avail_by_date$available <- NULL
ggplot(data = avail_by_date, aes(x = date, y = n)) +
geom_line() +
ggtitle("Airbnb Availability in Boston") +
labs(x = "Date", y = "# of available Flats",
subtitle = "365 days outlook starting September 2021")
# create index of available Airbnb
# index in which the max value represents 1 and min value represents 0
# the rest is mapped relatively to max and min
avail_by_date$indexed_n <- (avail_by_date$n - min(avail_by_date$n))/
(max(avail_by_date$n) - min(avail_by_date$n))
days <- avail_by_date$indexed_n
library(calendR)
## ~~ Package calendR
## Visit https://r-coder.com/ for R tutorials ~~
calendR(start_date = "2021-09-19",
end_date = "2022-09-18",
title = "Yearly Availability Index Of All Listings",
special.days = days,
special.col = "#00AAAE",
gradient = TRUE,
legend.title = "Availability Index",
legend.pos = "right",
weeknames = c("M", "T", "W", "T", "F", "S", "S"))
# get the average Price per day, since we have the available flats per day
# therefore create new df with avail and price per day
avail_price_by_date <- calendar %>%
group_by(date) %>%
filter(available == "t") %>%
summarise(price = mean(price), available = n())
# calculate correlation between available and price
cor(avail_price_by_date$price, avail_price_by_date$available)
## [1] -0.6206325
# the correlation coefficient between the 2 variables is approx. -0.62
# the correlation is negative!
# if the price is high, the availability related to it is in tendency lower
# however correlation is not causation
# one cannot tell which leads to which
ggplot(data = avail_price_by_date, aes(x = price, y = available)) +
geom_point() +
ggtitle("Prices against Availability Airbnb") +
labs(x = "avg price per day", y = "avg available listings per day",
subtitle = "Negative Correlation between Price and Available Airbnb Listings in Boston") +
annotate(geom = "text", x = 250, y = 1700, label = "correlation = -0.62")
# load listings and get an overview of the dataset
listings <- read_csv("/cloud/project/01_data/boston/listings.csv")
## Rows: 3123 Columns: 20
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (5): price, neighbourhood_cleansed, property_type, room_type, host_acc...
## dbl (11): id, latitude, longitude, availability_30, beds, bedrooms, review_...
## lgl (3): neighbourhood_group_cleansed, bathrooms, host_is_superhost
## date (1): host_since
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
head(listings)
summary(listings)
## id price neighbourhood_cleansed
## Min. : 3781 Length:3123 Length:3123
## 1st Qu.:18197318 Class :character Class :character
## Median :35467463 Mode :character Mode :character
## Mean :31919334
## 3rd Qu.:47464676
## Max. :52324087
##
## neighbourhood_group_cleansed latitude longitude bathrooms
## Mode:logical Min. :42.24 Min. :-71.17 Mode:logical
## NA's:3123 1st Qu.:42.32 1st Qu.:-71.10 NA's:3123
## Median :42.34 Median :-71.07
## Mean :42.34 Mean :-71.08
## 3rd Qu.:42.35 3rd Qu.:-71.06
## Max. :42.40 Max. :-70.99
##
## availability_30 beds bedrooms review_scores_value
## Min. : 0.000 Min. : 0.000 Min. : 1.000 Min. :1.000
## 1st Qu.: 0.000 1st Qu.: 1.000 1st Qu.: 1.000 1st Qu.:4.500
## Median : 3.000 Median : 1.000 Median : 1.000 Median :4.725
## Mean : 8.508 Mean : 1.658 Mean : 1.464 Mean :4.624
## 3rd Qu.:15.000 3rd Qu.: 2.000 3rd Qu.: 2.000 3rd Qu.:4.880
## Max. :30.000 Max. :22.000 Max. :13.000 Max. :5.000
## NA's :110 NA's :440 NA's :861
## minimum_nights maximum_nights availability_365 property_type
## Min. : 1.00 Min. : 1.0 Min. : 0.0 Length:3123
## 1st Qu.: 2.00 1st Qu.: 365.0 1st Qu.: 50.5 Class :character
## Median : 29.00 Median :1125.0 Median :160.0 Mode :character
## Mean : 39.33 Mean : 756.6 Mean :179.1
## 3rd Qu.: 91.00 3rd Qu.:1125.0 3rd Qu.:321.0
## Max. :1000.00 Max. :9999.0 Max. :365.0
##
## room_type host_since host_is_superhost host_acceptance_rate
## Length:3123 Min. :2008-12-03 Mode :logical Length:3123
## Class :character 1st Qu.:2014-07-15 FALSE:2320 Class :character
## Mode :character Median :2016-05-16 TRUE :803 Mode :character
## Mean :2016-05-24
## 3rd Qu.:2018-10-03
## Max. :2021-09-04
##
## calculated_host_listings_count
## Min. : 1.00
## 1st Qu.: 1.00
## Median : 5.00
## Mean : 23.43
## 3rd Qu.: 26.00
## Max. :168.00
##
str(listings)
## spec_tbl_df [3,123 × 20] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
## $ id : num [1:3123] 3781 5506 6695 8789 10730 ...
## $ price : chr [1:3123] "$125.00" "$124.00" "$169.00" "$110.00" ...
## $ neighbourhood_cleansed : chr [1:3123] "East Boston" "Roxbury" "Roxbury" "Downtown" ...
## $ neighbourhood_group_cleansed : logi [1:3123] NA NA NA NA NA NA ...
## $ latitude : num [1:3123] 42.4 42.3 42.3 42.4 42.4 ...
## $ longitude : num [1:3123] -71 -71.1 -71.1 -71.1 -71.1 ...
## $ bathrooms : logi [1:3123] NA NA NA NA NA NA ...
## $ availability_30 : num [1:3123] 3 9 5 0 0 14 0 0 30 0 ...
## $ beds : num [1:3123] 0 1 0 1 1 1 1 0 0 3 ...
## $ bedrooms : num [1:3123] 1 1 NA 1 1 NA NA NA 1 3 ...
## $ review_scores_value : num [1:3123] 4.9 4.77 4.7 4.56 4.43 4.75 NA NA NA 4.47 ...
## $ minimum_nights : num [1:3123] 32 3 3 91 91 29 33 91 91 30 ...
## $ maximum_nights : num [1:3123] 1125 90 730 365 365 ...
## $ availability_365 : num [1:3123] 153 46 62 310 277 329 323 0 365 274 ...
## $ property_type : chr [1:3123] "Entire rental unit" "Entire guest suite" "Entire condominium (condo)" "Entire rental unit" ...
## $ room_type : chr [1:3123] "Entire home/apt" "Entire home/apt" "Entire home/apt" "Entire home/apt" ...
## $ host_since : Date[1:3123], format: "2008-12-03" "2009-02-19" ...
## $ host_is_superhost : logi [1:3123] FALSE TRUE TRUE TRUE TRUE TRUE ...
## $ host_acceptance_rate : chr [1:3123] NA "96%" "96%" "60%" ...
## $ calculated_host_listings_count: num [1:3123] 1 10 10 5 5 11 11 2 1 27 ...
## - attr(*, "spec")=
## .. cols(
## .. id = col_double(),
## .. price = col_character(),
## .. neighbourhood_cleansed = col_character(),
## .. neighbourhood_group_cleansed = col_logical(),
## .. latitude = col_double(),
## .. longitude = col_double(),
## .. bathrooms = col_logical(),
## .. availability_30 = col_double(),
## .. beds = col_double(),
## .. bedrooms = col_double(),
## .. review_scores_value = col_double(),
## .. minimum_nights = col_double(),
## .. maximum_nights = col_double(),
## .. availability_365 = col_double(),
## .. property_type = col_character(),
## .. room_type = col_character(),
## .. host_since = col_date(format = ""),
## .. host_is_superhost = col_logical(),
## .. host_acceptance_rate = col_character(),
## .. calculated_host_listings_count = col_double()
## .. )
## - attr(*, "problems")=<externalptr>
# transform price to numeric!
listings$price <- str_remove_all(listings$price, "[$]")
listings$price <- str_remove_all(listings$price, "[,]")
listings$price <- as.numeric(listings$price)
# create df with avg price and sd per neighbourhood
price_by_neighbourhood <- listings %>%
group_by(neighbourhood_cleansed) %>%
summarise(mean_price = round(mean(price), 2),
sd_price = round(sd(price), 2))
# most expensive on avg is leather district
# least expensive on avg is Hyde Park
# removed the 1 outlier Leather District = 3999 as it is way higher than the rest!
# the rest of the prices for the graph are between 0 and 300
ggplot(data = listings, aes(x = neighbourhood_cleansed, y = price)) +
geom_boxplot() +
geom_jitter(width = 0.2) +
ylim(0, 300) +
xlim("Hyde Park", "Leather District") +
ggtitle("Distribution of Prices")
## Warning: Removed 3083 rows containing missing values (stat_boxplot).
## Warning: Removed 1 rows containing non-finite values (stat_boxplot).
## Warning: Removed 3084 rows containing missing values (geom_point).
# load reviews and get an overview of the dataset
reviews <- read_csv("/cloud/project/01_data/boston/reviews.csv")
## Rows: 122879 Columns: 2
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (1): listing_id
## date (1): date
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
str(reviews)
## spec_tbl_df [122,879 × 2] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
## $ listing_id: num [1:122879] 3781 3781 3781 3781 3781 ...
## $ date : Date[1:122879], format: "2015-07-10" "2015-08-09" ...
## - attr(*, "spec")=
## .. cols(
## .. listing_id = col_double(),
## .. date = col_date(format = "")
## .. )
## - attr(*, "problems")=<externalptr>
summary(reviews)
## listing_id date
## Min. : 3781 Min. :2009-03-21
## 1st Qu.: 6401859 1st Qu.:2017-09-26
## Median :15256830 Median :2019-01-10
## Mean :17278311 Mean :2018-11-07
## 3rd Qu.:24484501 3rd Qu.:2020-01-04
## Max. :52113003 Max. :2021-09-19
# get number of reviews per id
# change column name of listing_id so that it fits to reviews$id
reviews_by_id <- reviews %>%
group_by(listing_id) %>%
summarise(n_reviews = n()) %>%
rename(id = listing_id)
# avg number of reviews per appartment
mean(reviews_by_id$n_reviews)
## [1] 53.98902
# merge reviews_by_id and listings
listings_reviews <- merge(reviews_by_id, listings, by = "id" )
# reduce listing_reviews for a better overview
listings_reviews <- listings_reviews %>%
select("id", "n_reviews", "neighbourhood_cleansed",
"latitude", "longitude")
review_desc <- listings_reviews %>%
arrange(desc(n_reviews)) %>%
slice_head(n = 200)
ggplot(data = review_desc, aes(x = n_reviews, y = neighbourhood_cleansed)) +
geom_bar(stat = "identity") +
ggtitle("Locations of Most Reviewed Apartments") +
xlab("Number of Reviews") + ylab("") +
labs(subtitle = "neighbourhoods of 200 most reviewed apartments")
width <- max(review_desc$latitude) - min(review_desc$latitude)
height <- max(review_desc$longitude) - min(review_desc$longitude)
boston_border <- c(bottom = min(review_desc$latitude) - 0.1 * height,
top = max(review_desc$latitude) + 0.1 * height,
left = min(review_desc$longitude) - 0.1 * width,
right = max(review_desc$longitude) + 0.1 * width)
boston_map <- get_stamenmap(boston_border)
## Source : http://tile.stamen.com/terrain/10/309/378.png
## Source : http://tile.stamen.com/terrain/10/310/378.png
## Source : http://tile.stamen.com/terrain/10/309/379.png
## Source : http://tile.stamen.com/terrain/10/310/379.png
ggmap(boston_map) +
geom_point(data = review_desc, aes(x = longitude, y = latitude), color="steelblue")+
ggtitle("Most Reviewed Airbnbs in Boston Top 200")
popup_1 <- paste(sep = "<br/>",
paste0("<b>ID: </b>", review_desc$id),
paste0("<b>Type: </b>" ,listings$property_type),
paste0("<b>Price: </b>" ,listings$price, " $ / night"),
paste0("<b>Number of Reviews: </b>", review_desc$n_reviews))
m <- leaflet(review_desc) %>%
addTiles() %>%
addMarkers(lng = ~ longitude,
lat = ~ latitude,
popup = popup_1)
m
ggmap(boston_map) +
geom_density_2d(data = review_desc, aes(x = longitude, y = latitude)) +
stat_density2d(data = review_desc, aes(x = longitude, y = latitude)) +
ggtitle("Heatmap of Airbnb Housings in Boston (Top200)")
# Part 2: Price Prediction Using Statistical Methods (motivated Beginner + Advanced Tracks) ## Split Dataset 70/30
set.seed(35)
listings_df <- as.data.frame(listings)
splitted_data <- sample.split(listings_df, SplitRatio = 0.7)
train_set = listings_df[splitted_data == TRUE, ]
test_set = listings_df[splitted_data == FALSE, ]
# get all numeric columns and create correlation matrix
df_num <- unlist(lapply(listings_df, is.numeric))
dataset_num = listings_df[df_num == TRUE]
cor(dataset_num)
## id price latitude
## id 1.00000000 0.057130136 0.122799989
## price 0.05713014 1.000000000 0.105217060
## latitude 0.12279999 0.105217060 1.000000000
## longitude 0.05592774 0.110615119 0.269210262
## availability_30 0.16828774 0.092200742 -0.003924321
## beds NA NA NA
## bedrooms NA NA NA
## review_scores_value NA NA NA
## minimum_nights -0.13720303 -0.050997202 -0.003551736
## maximum_nights -0.14072180 0.041058353 0.034549541
## availability_365 0.13300036 0.043215733 -0.062785567
## calculated_host_listings_count 0.33076194 -0.004734504 0.140622942
## longitude availability_30 beds bedrooms
## id 0.05592774 0.168287745 NA NA
## price 0.11061512 0.092200742 NA NA
## latitude 0.26921026 -0.003924321 NA NA
## longitude 1.00000000 0.069121342 NA NA
## availability_30 0.06912134 1.000000000 NA NA
## beds NA NA 1 NA
## bedrooms NA NA NA 1
## review_scores_value NA NA NA NA
## minimum_nights -0.08872897 0.135178319 NA NA
## maximum_nights -0.02314254 -0.048840898 NA NA
## availability_365 0.04556522 0.509657608 NA NA
## calculated_host_listings_count 0.03397609 0.047550977 NA NA
## review_scores_value minimum_nights
## id NA -0.137203030
## price NA -0.050997202
## latitude NA -0.003551736
## longitude NA -0.088728970
## availability_30 NA 0.135178319
## beds NA NA
## bedrooms NA NA
## review_scores_value 1 NA
## minimum_nights NA 1.000000000
## maximum_nights NA 0.155430641
## availability_365 NA 0.048285377
## calculated_host_listings_count NA 0.066176347
## maximum_nights availability_365
## id -0.14072180 0.13300036
## price 0.04105835 0.04321573
## latitude 0.03454954 -0.06278557
## longitude -0.02314254 0.04556522
## availability_30 -0.04884090 0.50965761
## beds NA NA
## bedrooms NA NA
## review_scores_value NA NA
## minimum_nights 0.15543064 0.04828538
## maximum_nights 1.00000000 0.03313297
## availability_365 0.03313297 1.00000000
## calculated_host_listings_count 0.10458032 0.21432412
## calculated_host_listings_count
## id 0.330761940
## price -0.004734504
## latitude 0.140622942
## longitude 0.033976092
## availability_30 0.047550977
## beds NA
## bedrooms NA
## review_scores_value NA
## minimum_nights 0.066176347
## maximum_nights 0.104580317
## availability_365 0.214324123
## calculated_host_listings_count 1.000000000
linreg_1 <- lm(price ~ bedrooms, data = train_set)
summary(linreg_1)
##
## Call:
## lm(formula = price ~ bedrooms, data = train_set)
##
## Residuals:
## Min 1Q Median 3Q Max
## -393.2 -87.6 -42.9 53.1 4857.1
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 42.797 9.835 4.351 1.43e-05 ***
## bedrooms 100.100 5.832 17.164 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 216.3 on 1872 degrees of freedom
## (312 observations deleted due to missingness)
## Multiple R-squared: 0.136, Adjusted R-squared: 0.1355
## F-statistic: 294.6 on 1 and 1872 DF, p-value: < 2.2e-16
linreg_2 <- lm(price ~ bedrooms + review_scores_value, data = train_set)
summary(linreg_2)
##
## Call:
## lm(formula = price ~ bedrooms + review_scores_value, data = train_set)
##
## Residuals:
## Min 1Q Median 3Q Max
## -416.8 -79.4 -36.9 40.5 4865.1
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 124.767 69.905 1.785 0.0745 .
## bedrooms 108.600 6.838 15.881 <2e-16 ***
## review_scores_value -21.180 14.939 -1.418 0.1565
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 227.4 on 1360 degrees of freedom
## (823 observations deleted due to missingness)
## Multiple R-squared: 0.1569, Adjusted R-squared: 0.1556
## F-statistic: 126.5 on 2 and 1360 DF, p-value: < 2.2e-16
The Second Model shows a higher R-Squared, however with a higher number of variables the R value automatically increases. The R-Value for both models is quite low!
predicted_price <- predict(linreg_2, test_set)
rmse <- function(actual, predicted) {
sqrt(mean((predicted - actual)^2, na.rm = TRUE))
}
rmse(actual = test_set$price, predicted = predicted_price)
## [1] 449.7764
linreg_3 <- lm(price ~ bedrooms + review_scores_value + minimum_nights + property_type, data = train_set)
summary(linreg_3)
##
## Call:
## lm(formula = price ~ bedrooms + review_scores_value + minimum_nights +
## property_type, data = train_set)
##
## Residuals:
## Min 1Q Median 3Q Max
## -444.6 -62.3 -16.0 30.8 4727.0
##
## Coefficients:
## Estimate Std. Error t value
## (Intercept) 229.3042 142.9161 1.604
## bedrooms 75.7715 8.1288 9.321
## review_scores_value -9.4835 14.5200 -0.653
## minimum_nights 0.0678 0.1262 0.537
## property_typeCastle 88.3570 250.8111 0.352
## property_typeEntire bed and breakfast -65.3416 250.8427 -0.260
## property_typeEntire condominium (condo) -111.5095 126.8861 -0.879
## property_typeEntire guest suite -168.0115 134.4691 -1.249
## property_typeEntire guesthouse -156.1280 198.2802 -0.787
## property_typeEntire loft -68.8616 140.2220 -0.491
## property_typeEntire place -87.3662 250.8226 -0.348
## property_typeEntire rental unit -70.6622 125.8716 -0.561
## property_typeEntire residential home -75.0696 128.9773 -0.582
## property_typeEntire serviced apartment 5.2642 131.1196 0.040
## property_typeEntire townhouse 233.4639 134.4102 1.737
## property_typeHouseboat -12.2692 251.1705 -0.049
## property_typePrivate room in bed and breakfast -99.1161 135.4817 -0.732
## property_typePrivate room in bungalow -243.1266 198.3190 -1.226
## property_typePrivate room in condominium (condo) -193.6089 130.0548 -1.489
## property_typePrivate room in guest suite -144.9211 144.8165 -1.001
## property_typePrivate room in guesthouse -213.6075 250.8130 -0.852
## property_typePrivate room in loft -228.6177 177.3787 -1.289
## property_typePrivate room in rental unit -196.2057 126.3434 -1.553
## property_typePrivate room in residential home -186.2897 126.3662 -1.474
## property_typePrivate room in townhouse -135.6940 132.2235 -1.026
## property_typeRoom in bed and breakfast -183.7693 250.8042 -0.733
## property_typeRoom in boutique hotel -42.2039 133.3421 -0.317
## property_typeRoom in hotel 180.5869 138.2224 1.306
## property_typeShared room in townhouse -236.8278 251.0786 -0.943
## Pr(>|t|)
## (Intercept) 0.1088
## bedrooms <2e-16 ***
## review_scores_value 0.5138
## minimum_nights 0.5912
## property_typeCastle 0.7247
## property_typeEntire bed and breakfast 0.7945
## property_typeEntire condominium (condo) 0.3797
## property_typeEntire guest suite 0.2117
## property_typeEntire guesthouse 0.4312
## property_typeEntire loft 0.6234
## property_typeEntire place 0.7277
## property_typeEntire rental unit 0.5746
## property_typeEntire residential home 0.5606
## property_typeEntire serviced apartment 0.9680
## property_typeEntire townhouse 0.0826 .
## property_typeHouseboat 0.9610
## property_typePrivate room in bed and breakfast 0.4646
## property_typePrivate room in bungalow 0.2204
## property_typePrivate room in condominium (condo) 0.1368
## property_typePrivate room in guest suite 0.3171
## property_typePrivate room in guesthouse 0.3946
## property_typePrivate room in loft 0.1977
## property_typePrivate room in rental unit 0.1207
## property_typePrivate room in residential home 0.1407
## property_typePrivate room in townhouse 0.3050
## property_typeRoom in bed and breakfast 0.4639
## property_typeRoom in boutique hotel 0.7517
## property_typeRoom in hotel 0.1916
## property_typeShared room in townhouse 0.3457
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 217.2 on 1334 degrees of freedom
## (823 observations deleted due to missingness)
## Multiple R-squared: 0.2455, Adjusted R-squared: 0.2297
## F-statistic: 15.5 on 28 and 1334 DF, p-value: < 2.2e-16
# Even if the R squared increases the p Value isnt significant for any
# variable except bedrooms!