#step1: establish general directory
getwd()
## [1] "/cloud/project"
#step2: establish specific directory
list.files("/cloud/project")
## [1] "AirBnB Analysis Report.Rmd" "AirBnB-Analysis-Report.Rmd"
## [3] "analysis"                   "data"                      
## [5] "project.Rproj"
#step3: list files in specific directory
list.files("/cloud/project/data")
## [1] "Athens_Airbnb_Data3.csv"
#step4: load dataset
airbnb <- read.csv("/cloud/project/data/Athens_Airbnb_Data3.csv")
head(airbnb)
##      id                                           name host_id latitude
## 1 10595            96m2, 3BR, 2BA, Metro, WI-FI etc...   37177 37.98863
## 2 10990   Athens Quality Apartments - Deluxe Apartment   37177 37.98903
## 3 10993             Athens Quality Apartments - Studio   37177 37.98888
## 4 10995 AQA-No2 1-bedroom, smart tv, fiber connection,   37177 37.98903
## 5 27262             54m2, 1-br, cable tv, wi-fi, metro   37177 37.98924
## 6 28186           ❤️Deluxe central loft near Acropolis❤️  121318 37.97545
##   longitude       room_type price minimum_nights number_of_reviews
## 1  23.76527 Entire home/apt    70              1                32
## 2  23.76448 Entire home/apt    50              1                54
## 3  23.76473 Entire home/apt    38              1                76
## 4  23.76448 Entire home/apt    48              1                27
## 5  23.76500 Entire home/apt    52              1                17
## 6  23.72892 Entire home/apt    54              2               466
##   calculated_host_listings_count availability_365 number_of_reviews_ltm
## 1                              6              114                     7
## 2                              6              364                    10
## 3                              6              312                    22
## 4                              6              236                     4
## 5                              6              176                     0
## 6                              2              358                    11
# load tidyverse package
install.packages("tidyverse")
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.5'
## (as 'lib' is unspecified)
library (tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.2     ✔ tibble    3.3.0
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.1.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
# load dplyr package
install.packages("dplyr")
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.5'
## (as 'lib' is unspecified)
library(dplyr)

# load ggplot package
install.packages ("ggplot")
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.5'
## (as 'lib' is unspecified)
## Warning: package 'ggplot' is not available for this version of R
## 
## A version of this package for your version of R might be available elsewhere,
## see the ideas at
## https://cran.r-project.org/doc/manuals/r-patched/R-admin.html#Installing-packages
library (ggplot2)
#What is the average price of listings by room type?

#step1: aggregate price and group by host id
average_price_host <- airbnb %>%
  group_by(room_type)%>%
  summarise(avg_price=mean(price,na.rm=TRUE),.groups='drop')

#step2: print results
print(average_price_host)
## # A tibble: 4 × 2
##   room_type       avg_price
##   <chr>               <dbl>
## 1 Entire home/apt      84.2
## 2 Hotel room          189. 
## 3 Private room        100. 
## 4 Shared room          61.9
#step3:visualize results
library(ggplot2)
ggplot(data=average_price_host,mapping =aes(x=room_type,y=avg_price))+
  geom_col(color='black', fill='lightblue')+
  labs(title='Average Price by Room Type')

#Is there a correlation between price and availability_365?

#step1: calculate coefficient correlation
price_correlation <- cor(airbnb$price,airbnb$availability_365,use='complete.obs')

#step2: print results
print(price_correlation)
## [1] 0.01251695
# What is the distribution of minimum nights across listings

# Create a histogram of minimum nights
ggplot(airbnb, aes(x = minimum_nights)) +
  geom_histogram(binwidth = 1, fill = "skyblue", color = "black") +
  labs(
    title = "Distribution of Minimum Nights Required",
    x = "Minimum Nights",
    y = "Number of Listings"
  ) +
  theme_minimal()

#How does room type affect the average number of reviews

#step1:aggregate number of reviews and group by room type
room_review <- airbnb %>%
  group_by(room_type)%>%
  summarise(review_total=sum(number_of_reviews_ltm,na.rm=TRUE),.groups='drop')

#step2: print results
print(room_review)
## # A tibble: 4 × 2
##   room_type       review_total
##   <chr>                  <int>
## 1 Entire home/apt        89870
## 2 Hotel room               553
## 3 Private room            3883
## 4 Shared room              215
#step3: visualize results
ggplot(data=room_review,mapping=aes(x=room_type,y=review_total,fill=room_type))+
  geom_col(color='black')+labs(title='Total reviews by room type')+
  theme(axis.text.x = element_text(angle=45,hjust=1))

#Which listing has the highest no. of reviews in the last twelve months?

#step1: load r packages
library(dplyr)

#step2: sum reviews and group by listing
busiest_listings <- airbnb %>%
  group_by (name)%>%
  summarise(total_reviews=sum(number_of_reviews,na.rm=TRUE),.groups='drop')%>%
  arrange(desc(total_reviews))%>%
  slice(1:5)

#step3: print results
print(busiest_listings)
## # A tibble: 5 × 2
##   name                                               total_reviews
##   <chr>                                                      <int>
## 1 In the heart of the city                                     798
## 2 Trad Studio Appt -  Central Athens                           753
## 3 2 levels flat in centre of Athens                            675
## 4 Industrial loft-Acropolis view                               598
## 5 Acropolis View-Ermou & Aiolou suite 1@ Monastiraki           593
#step4: visualize results
ggplot(data=busiest_listings,mapping=aes(x=name,y=total_reviews,fill=name))+
  geom_col(color='black')+
  labs(title='Listings with highest reviews')

# what is the rate of reviews for highest reviewed listings?
library(dplyr)

# step1: aggregate and group by listing then calculate rate
rate_of_reviews <- airbnb %>%
  group_by(name) %>%
  summarise(review_total = sum(number_of_reviews,na.rm=TRUE),.groups='drop')%>%
  mutate(review_rate=review_total/12) %>%
  arrange(desc(review_total)) %>%
  slice(1:10)

# step2: print results
print (rate_of_reviews)
## # A tibble: 10 × 3
##    name                                               review_total review_rate
##    <chr>                                                     <int>       <dbl>
##  1 In the heart of the city                                    798        66.5
##  2 Trad Studio Appt -  Central Athens                          753        62.8
##  3 2 levels flat in centre of Athens                           675        56.2
##  4 Industrial loft-Acropolis view                              598        49.8
##  5 Acropolis View-Ermou & Aiolou suite 1@ Monastiraki          593        49.4
##  6 Location, location, location!                               588        49  
##  7 Acropolis Apartment brand new, 100 Mbps                     563        46.9
##  8 Acropolis View-Ermou & Aiolou studio3@ Monastiraki          546        45.5
##  9 Acropolis walk, 100 Mbps in Koukaki                         541        45.1
## 10 Loft apartment with Acropolis view                          531        44.2