Create the CSV file

# Create a data frame
travel_data <- data.frame(
  Agency = c("TravelOne - Flight", "TravelOne - Hotel",
             "Google Travel - Flight", "Google Travel - Hotel",
             "Booking.com - Flight", "Booking.com - Hotel"),
  Winter = c(350, 500, 340, 480, 360, 490),
  Spring = c(400, 550, 410, 530, 415, 540),
  Summer = c(525, 700, 510, 690, 520, 710),
  Fall = c(375, 600, 365, 580, 370, 590)
)

# Print dataset
print(travel_data)
##                   Agency Winter Spring Summer Fall
## 1     TravelOne - Flight    350    400    525  375
## 2      TravelOne - Hotel    500    550    700  600
## 3 Google Travel - Flight    340    410    510  365
## 4  Google Travel - Hotel    480    530    690  580
## 5   Booking.com - Flight    360    415    520  370
## 6    Booking.com - Hotel    490    540    710  590
# Export to CSV
write.csv(travel_data, "travel_data.csv", row.names = FALSE)

Let’s read the CSV file and transform it into a long format.

I will read my csv file and then convert the dataset to a long format so that each row contains:Agency,Type, Season and value.

library(tidyr)

# read the CSV file

travel_data <- read.csv("travel_data.csv")

# Reshape to long format

travel_data_long <- travel_data %>%
  pivot_longer(
    cols = Winter:Fall,
    names_to = "Season",
    values_to = "Value"
  ) %>%
  separate(Agency, into = c("Agency", "Type"), sep = " - ")

# View final long dataset format

print(travel_data_long)
## # A tibble: 24 × 4
##    Agency        Type   Season Value
##    <chr>         <chr>  <chr>  <int>
##  1 TravelOne     Flight Winter   350
##  2 TravelOne     Flight Spring   400
##  3 TravelOne     Flight Summer   525
##  4 TravelOne     Flight Fall     375
##  5 TravelOne     Hotel  Winter   500
##  6 TravelOne     Hotel  Spring   550
##  7 TravelOne     Hotel  Summer   700
##  8 TravelOne     Hotel  Fall     600
##  9 Google Travel Flight Winter   340
## 10 Google Travel Flight Spring   410
## # ℹ 14 more rows

Let’s make a comparative analysis between agencies.

I will try to calculate the flight and hotels prices by agency throughout all four seasons.

library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
Avg_Flight_Price <- travel_data_long %>%
  filter(Type == "Flight") %>%
  group_by(Agency, Season) %>%
  summarise(Average_Flight_Price = mean(Value)) %>%
  arrange(Average_Flight_Price )
## `summarise()` has grouped output by 'Agency'. You can override using the
## `.groups` argument.
Avg_Flight_Price
Avg_Hotel_Price <- travel_data_long %>%
  filter(Type == "Hotel") %>%
  group_by(Agency, Season) %>%
  summarise(Average_Hotel_Price = mean(Value)) %>%
  arrange(Average_Hotel_Price )
## `summarise()` has grouped output by 'Agency'. You can override using the
## `.groups` argument.
Avg_Hotel_Price

Let’s evaluate the relationship between the average flight prices and average hotel price across all seasons.

library(ggplot2)

# Create a data table including both Average flight price and Average Hotel price

AvgFlight_Vs_AvgHotel <- data.frame(Avg_Flight_Price[,3], Avg_Hotel_Price[,3]) 
AvgFlight_Vs_AvgHotel
# let's graph a scatter plot that shows the correlation between the hotel and flights prices evolution.  

ggplot(AvgFlight_Vs_AvgHotel, aes(x = Average_Flight_Price, y = Average_Hotel_Price)) +
  geom_point(size = 3, color = "navyblue") +
  geom_smooth(method = "lm", se = FALSE, color = "green", linetype = "dashed") +
  labs(
    title = "Correlation Between Flight and Hotel Prices",
    x = "Average Flight Price ($)",
    y = "Average Hotel Price ($)"
  ) +
  theme_minimal(base_size = 14)
## `geom_smooth()` using formula = 'y ~ x'

INTERPRETATION

This plot shows how the flights and hotels prices move together. Moreover, the line slopes is upward that means both prices have a positive relationship more specifically, as flights increase, so do hotels).