# Create a data frame
travel_data <- data.frame(
Agency = c("TravelOne - Flight", "TravelOne - Hotel",
"Google Travel - Flight", "Google Travel - Hotel",
"Booking.com - Flight", "Booking.com - Hotel"),
Winter = c(350, 500, 340, 480, 360, 490),
Spring = c(400, 550, 410, 530, 415, 540),
Summer = c(525, 700, 510, 690, 520, 710),
Fall = c(375, 600, 365, 580, 370, 590)
)
# Print dataset
print(travel_data)
## Agency Winter Spring Summer Fall
## 1 TravelOne - Flight 350 400 525 375
## 2 TravelOne - Hotel 500 550 700 600
## 3 Google Travel - Flight 340 410 510 365
## 4 Google Travel - Hotel 480 530 690 580
## 5 Booking.com - Flight 360 415 520 370
## 6 Booking.com - Hotel 490 540 710 590
# Export to CSV
write.csv(travel_data, "travel_data.csv", row.names = FALSE)
I will read my csv file and then convert the dataset to a long format so that each row contains:Agency,Type, Season and value.
library(tidyr)
# read the CSV file
travel_data <- read.csv("travel_data.csv")
# Reshape to long format
travel_data_long <- travel_data %>%
pivot_longer(
cols = Winter:Fall,
names_to = "Season",
values_to = "Value"
) %>%
separate(Agency, into = c("Agency", "Type"), sep = " - ")
# View final long dataset format
print(travel_data_long)
## # A tibble: 24 × 4
## Agency Type Season Value
## <chr> <chr> <chr> <int>
## 1 TravelOne Flight Winter 350
## 2 TravelOne Flight Spring 400
## 3 TravelOne Flight Summer 525
## 4 TravelOne Flight Fall 375
## 5 TravelOne Hotel Winter 500
## 6 TravelOne Hotel Spring 550
## 7 TravelOne Hotel Summer 700
## 8 TravelOne Hotel Fall 600
## 9 Google Travel Flight Winter 340
## 10 Google Travel Flight Spring 410
## # ℹ 14 more rows
I will try to calculate the flight and hotels prices by agency throughout all four seasons.
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
Avg_Flight_Price <- travel_data_long %>%
filter(Type == "Flight") %>%
group_by(Agency, Season) %>%
summarise(Average_Flight_Price = mean(Value)) %>%
arrange(Average_Flight_Price )
## `summarise()` has grouped output by 'Agency'. You can override using the
## `.groups` argument.
Avg_Flight_Price
Avg_Hotel_Price <- travel_data_long %>%
filter(Type == "Hotel") %>%
group_by(Agency, Season) %>%
summarise(Average_Hotel_Price = mean(Value)) %>%
arrange(Average_Hotel_Price )
## `summarise()` has grouped output by 'Agency'. You can override using the
## `.groups` argument.
Avg_Hotel_Price
library(ggplot2)
# Create a data table including both Average flight price and Average Hotel price
AvgFlight_Vs_AvgHotel <- data.frame(Avg_Flight_Price[,3], Avg_Hotel_Price[,3])
AvgFlight_Vs_AvgHotel
# let's graph a scatter plot that shows the correlation between the hotel and flights prices evolution.
ggplot(AvgFlight_Vs_AvgHotel, aes(x = Average_Flight_Price, y = Average_Hotel_Price)) +
geom_point(size = 3, color = "navyblue") +
geom_smooth(method = "lm", se = FALSE, color = "green", linetype = "dashed") +
labs(
title = "Correlation Between Flight and Hotel Prices",
x = "Average Flight Price ($)",
y = "Average Hotel Price ($)"
) +
theme_minimal(base_size = 14)
## `geom_smooth()` using formula = 'y ~ x'
This plot shows how the flights and hotels prices move together. Moreover, the line slopes is upward that means both prices have a positive relationship more specifically, as flights increase, so do hotels).