pacman::p_load(pacman, tidyverse, gridExtra)

# Function to read, process, and predict future sales for a given data file
process_and_predict <- function(file_path, currency_col) {
  # Read data, specifying column types
  data <- read_csv(file_path, col_types = cols(
    Year = col_character(),
    !!currency_col := col_double()
  ))
  
  # Process data: separate year and quarter, create date, filter columns
  data <- data %>%
    separate(Year, into = c("Year", "Quarter"), sep = " ") %>%
    mutate(
      Quarter = recode(Quarter, Q1 = "01", Q2 = "04", Q3 = "07", Q4 = "10"),
      Date = as.Date(paste(Year, Quarter, "01", sep = "-"))
    ) %>%
    select(Date, all_of(currency_col))
  
  # Fit a linear regression model
  model <- lm(reformulate(termlabels = "Date", response = as.name(currency_col)), data = data)
  
  # Create future dates for the next 10 years (40 quarters)
  future_dates <- seq(as.Date("2024-01-01"), by = "quarter", length.out = 40)
  
  # Create a data frame for future dates and predict future sales
  future_data <- data.frame(Date = future_dates)
  future_data[[currency_col]] <- predict(model, newdata = future_data)
  
  # Combine historical data with predictions
  combined_data <- bind_rows(
    data %>% mutate(Source = "Historical"),
    future_data %>% mutate(Source = "Forecast")
  )
  
  # Create line chart with historical data and predictions
  line_plot <- ggplot(combined_data, aes(x = Date, y = .data[[currency_col]], color = Source)) +
    geom_line(data = combined_data %>% filter(Source == "Historical")) +
    geom_line(data = combined_data %>% filter(Source == "Forecast"), linetype = "dashed") +
    scale_color_manual(values = c("Historical" = "blue", "Forecast" = "red")) +
    labs(
      title = paste(str_extract(file_path, "^[^\\.]+"), "Sales Revenue Over Time with Linear Model Predictions"),
      x = "Year",
      y = currency_col
    ) +
    theme_minimal()
  
  return(line_plot)
}

# Generate plots
amazon_plot  <- process_and_predict("Amazon.csv",  "Billion USD")
alibaba_plot <- process_and_predict("Alibaba.csv", "Billion CYD")
ebay_plot    <- process_and_predict("eBay.csv",    "Billion USD")

# Combine all plots
grid.arrange(amazon_plot, alibaba_plot, ebay_plot, ncol = 1)