pacman::p_load(pacman, tidyverse, gridExtra)
# Function to read, process, and predict future sales for a given data file
process_and_predict <- function(file_path, currency_col) {
# Read data, specifying column types
data <- read_csv(file_path, col_types = cols(
Year = col_character(),
!!currency_col := col_double()
))
# Process data: separate year and quarter, create date, filter columns
data <- data %>%
separate(Year, into = c("Year", "Quarter"), sep = " ") %>%
mutate(
Quarter = recode(Quarter, Q1 = "01", Q2 = "04", Q3 = "07", Q4 = "10"),
Date = as.Date(paste(Year, Quarter, "01", sep = "-"))
) %>%
select(Date, all_of(currency_col))
# Fit a linear regression model
model <- lm(reformulate(termlabels = "Date", response = as.name(currency_col)), data = data)
# Create future dates for the next 10 years (40 quarters)
future_dates <- seq(as.Date("2024-01-01"), by = "quarter", length.out = 40)
# Create a data frame for future dates and predict future sales
future_data <- data.frame(Date = future_dates)
future_data[[currency_col]] <- predict(model, newdata = future_data)
# Combine historical data with predictions
combined_data <- bind_rows(
data %>% mutate(Source = "Historical"),
future_data %>% mutate(Source = "Forecast")
)
# Create line chart with historical data and predictions
line_plot <- ggplot(combined_data, aes(x = Date, y = .data[[currency_col]], color = Source)) +
geom_line(data = combined_data %>% filter(Source == "Historical")) +
geom_line(data = combined_data %>% filter(Source == "Forecast"), linetype = "dashed") +
scale_color_manual(values = c("Historical" = "blue", "Forecast" = "red")) +
labs(
title = paste(str_extract(file_path, "^[^\\.]+"), "Sales Revenue Over Time with Linear Model Predictions"),
x = "Year",
y = currency_col
) +
theme_minimal()
return(line_plot)
}
# Generate plots
amazon_plot <- process_and_predict("Amazon.csv", "Billion USD")
alibaba_plot <- process_and_predict("Alibaba.csv", "Billion CYD")
ebay_plot <- process_and_predict("eBay.csv", "Billion USD")
# Combine all plots
grid.arrange(amazon_plot, alibaba_plot, ebay_plot, ncol = 1)
