#step1: establish general directory
getwd()
## [1] "/cloud/project"
#step2: establish specific directory
list.files("/cloud/project")
## [1] "AirBnB Analysis Report.Rmd" "AirBnB-Analysis-Report.Rmd"
## [3] "analysis" "data"
## [5] "project.Rproj"
#step3: list files in specific directory
list.files("/cloud/project/data")
## [1] "Athens_Airbnb_Data3.csv"
#step4: load dataset
airbnb <- read.csv("/cloud/project/data/Athens_Airbnb_Data3.csv")
head(airbnb)
## id name host_id latitude
## 1 10595 96m2, 3BR, 2BA, Metro, WI-FI etc... 37177 37.98863
## 2 10990 Athens Quality Apartments - Deluxe Apartment 37177 37.98903
## 3 10993 Athens Quality Apartments - Studio 37177 37.98888
## 4 10995 AQA-No2 1-bedroom, smart tv, fiber connection, 37177 37.98903
## 5 27262 54m2, 1-br, cable tv, wi-fi, metro 37177 37.98924
## 6 28186 ❤️Deluxe central loft near Acropolis❤️ 121318 37.97545
## longitude room_type price minimum_nights number_of_reviews
## 1 23.76527 Entire home/apt 70 1 32
## 2 23.76448 Entire home/apt 50 1 54
## 3 23.76473 Entire home/apt 38 1 76
## 4 23.76448 Entire home/apt 48 1 27
## 5 23.76500 Entire home/apt 52 1 17
## 6 23.72892 Entire home/apt 54 2 466
## calculated_host_listings_count availability_365 number_of_reviews_ltm
## 1 6 114 7
## 2 6 364 10
## 3 6 312 22
## 4 6 236 4
## 5 6 176 0
## 6 2 358 11
# load tidyverse package
install.packages("tidyverse")
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.5'
## (as 'lib' is unspecified)
library (tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.2 ✔ tibble 3.3.0
## ✔ lubridate 1.9.4 ✔ tidyr 1.3.1
## ✔ purrr 1.1.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
# load dplyr package
install.packages("dplyr")
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.5'
## (as 'lib' is unspecified)
library(dplyr)
# load ggplot package
install.packages ("ggplot")
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.5'
## (as 'lib' is unspecified)
## Warning: package 'ggplot' is not available for this version of R
##
## A version of this package for your version of R might be available elsewhere,
## see the ideas at
## https://cran.r-project.org/doc/manuals/r-patched/R-admin.html#Installing-packages
library (ggplot2)
#What is the average price of listings by room type?
#step1: aggregate price and group by host id
average_price_host <- airbnb %>%
group_by(room_type)%>%
summarise(avg_price=mean(price,na.rm=TRUE),.groups='drop')
#step2: print results
print(average_price_host)
## # A tibble: 4 × 2
## room_type avg_price
## <chr> <dbl>
## 1 Entire home/apt 84.2
## 2 Hotel room 189.
## 3 Private room 100.
## 4 Shared room 61.9
#step3:visualize results
library(ggplot2)
ggplot(data=average_price_host,mapping =aes(x=room_type,y=avg_price))+
geom_col(color='black', fill='lightblue')+
labs(title='Average Price by Room Type')

#Is there a correlation between price and availability_365?
#step1: calculate coefficient correlation
price_correlation <- cor(airbnb$price,airbnb$availability_365,use='complete.obs')
#step2: print results
print(price_correlation)
## [1] 0.01251695
# What is the distribution of minimum nights across listings
# Create a histogram of minimum nights
ggplot(airbnb, aes(x = minimum_nights)) +
geom_histogram(binwidth = 1, fill = "skyblue", color = "black") +
labs(
title = "Distribution of Minimum Nights Required",
x = "Minimum Nights",
y = "Number of Listings"
) +
theme_minimal()

#How does room type affect the average number of reviews
#step1:aggregate number of reviews and group by room type
room_review <- airbnb %>%
group_by(room_type)%>%
summarise(review_total=sum(number_of_reviews_ltm,na.rm=TRUE),.groups='drop')
#step2: print results
print(room_review)
## # A tibble: 4 × 2
## room_type review_total
## <chr> <int>
## 1 Entire home/apt 89870
## 2 Hotel room 553
## 3 Private room 3883
## 4 Shared room 215
#step3: visualize results
ggplot(data=room_review,mapping=aes(x=room_type,y=review_total,fill=room_type))+
geom_col(color='black')+labs(title='Total reviews by room type')+
theme(axis.text.x = element_text(angle=45,hjust=1))

#Which listing has the highest no. of reviews in the last twelve months?
#step1: load r packages
library(dplyr)
#step2: sum reviews and group by listing
busiest_listings <- airbnb %>%
group_by (name)%>%
summarise(total_reviews=sum(number_of_reviews,na.rm=TRUE),.groups='drop')%>%
arrange(desc(total_reviews))%>%
slice(1:5)
#step3: print results
print(busiest_listings)
## # A tibble: 5 × 2
## name total_reviews
## <chr> <int>
## 1 In the heart of the city 798
## 2 Trad Studio Appt - Central Athens 753
## 3 2 levels flat in centre of Athens 675
## 4 Industrial loft-Acropolis view 598
## 5 Acropolis View-Ermou & Aiolou suite 1@ Monastiraki 593
#step4: visualize results
ggplot(data=busiest_listings,mapping=aes(x=name,y=total_reviews,fill=name))+
geom_col(color='black')+
labs(title='Listings with highest reviews')

# what is the rate of reviews for highest reviewed listings?
library(dplyr)
# step1: aggregate and group by listing then calculate rate
rate_of_reviews <- airbnb %>%
group_by(name) %>%
summarise(review_total = sum(number_of_reviews,na.rm=TRUE),.groups='drop')%>%
mutate(review_rate=review_total/12) %>%
arrange(desc(review_total)) %>%
slice(1:10)
# step2: print results
print (rate_of_reviews)
## # A tibble: 10 × 3
## name review_total review_rate
## <chr> <int> <dbl>
## 1 In the heart of the city 798 66.5
## 2 Trad Studio Appt - Central Athens 753 62.8
## 3 2 levels flat in centre of Athens 675 56.2
## 4 Industrial loft-Acropolis view 598 49.8
## 5 Acropolis View-Ermou & Aiolou suite 1@ Monastiraki 593 49.4
## 6 Location, location, location! 588 49
## 7 Acropolis Apartment brand new, 100 Mbps 563 46.9
## 8 Acropolis View-Ermou & Aiolou studio3@ Monastiraki 546 45.5
## 9 Acropolis walk, 100 Mbps in Koukaki 541 45.1
## 10 Loft apartment with Acropolis view 531 44.2