Dataset
The dataset consists of historical sales data collected over the past two years, including information on :
- Date: Date of Sale
- Promotional_Spending: Amount of Promotional Spending
- Price: Product Price
- Weather_Condition: Weather Condition
- Sales_Revenue : Total Sales Revenue
# Load required libraries
library(tidyverse)
library(lubridate)
# Set seed for reproducibility
set.seed(123)
# Number of observations
n <- 100
# Simulate date range
start_date <- ymd("2022-01-01")
end_date <- ymd("2022-04-10") # Update end date to have 100 days
dates <- seq(start_date, end_date, by = "day")
# Simulate predictor variables
promotional_spending <- runif(n, min = 1000, max = 5000)
price <- rnorm(n, mean = 50, sd = 10)
weather_conditions <- sample(c("sunny", "cloudy", "rainy"), size = n, replace = TRUE)
# Simulate sales revenue
sales_trend <- 0.1 * seq(1, n)
seasonal_pattern <- sin(seq(1, n) * 2 * pi / 365 * 7) * 100
sales_noise <- rnorm(n, mean = 0, sd = 100)
sales_revenue <- 1000 + sales_trend + seasonal_pattern + sales_noise
# Create dataframe
simulated_data <- tibble(
Date = dates,
Promotional_Spending = promotional_spending,
Price = price,
Weather_Conditions = weather_conditions,
Sales_Revenue = sales_revenue
)
# Display the first few rows of the dataset
head(simulated_data)
1
Develop a regression model to understand the relationship between sales revenue and various predictors such as promotional spending, pricing, and external factors.
# Load required library
library(tidyverse)
# Fit linear regression model
model <- lm(Sales_Revenue ~ Promotional_Spending + Price + Weather_Conditions, data = simulated_data)
# Summarize the model
summary(model)
##
## Call:
## lm(formula = Sales_Revenue ~ Promotional_Spending + Price + Weather_Conditions,
## data = simulated_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -340.29 -71.58 11.21 82.72 315.61
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 992.92772 81.16696 12.233 <2e-16 ***
## Promotional_Spending 0.01922 0.01197 1.605 0.112
## Price -0.64144 1.42687 -0.450 0.654
## Weather_Conditionsrainy -22.74556 36.17645 -0.629 0.531
## Weather_Conditionssunny -23.58060 32.57721 -0.724 0.471
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 134.7 on 95 degrees of freedom
## Multiple R-squared: 0.03842, Adjusted R-squared: -0.002062
## F-statistic: 0.9491 on 4 and 95 DF, p-value: 0.4393
note disini kita akan meload data nya.
# Load required libraries
library(ggplot2)
library(ggfortify)
library(gridExtra)
library(cowplot)
# Predict sales revenue using the model
simulated_data$Predicted_Sales <- predict(model)
# Residual vs Fitted plot
residual_plot <- ggplot(simulated_data, aes(x = Predicted_Sales, y = resid(model))) +
geom_point(color = "#8B322C") +
geom_hline(yintercept = 0, linetype = "dashed", color = "#17B0AE") +
labs(x = "Fitted Values", y = "Residuals", title = "Residuals vs Fitted") +
theme_minimal()
# Distribution of Residuals plot
residual_distribution <- ggplot(simulated_data, aes(x = resid(model))) +
geom_histogram(binwidth = 100, fill = "#436850", color = "#12372A") +
labs(x = "Residuals", y = "Frequency", title = "Distribution of Residuals") +
theme_minimal()
# Combine plots
plot_grid(residual_plot, residual_distribution, nrow = 2)
2
Build a time series model to capture the temporal patterns and trends in sales revenue, accounting for seasonality and other time-related effects.
# Load required libraries
library(forecast)
library(gridExtra)
# Convert Date column to Date format if not already
simulated_data$Date <- as.Date(simulated_data$Date)
# Set Date column as time series
sales_ts <- ts(simulated_data$Sales_Revenue, frequency = 7) # Assuming weekly seasonality
# Fit ARIMA model
arima_model <- auto.arima(sales_ts)
# Actual vs. Fitted plot
actual_vs_fitted <- autoplot(forecast(arima_model), main = "Actual vs Fitted", xlab = "Date", ylab = "Sales Revenue")
# Residuals plot
residuals_plot <- ggplot(data.frame(residuals = residuals(arima_model)), aes(x = 1:length(residuals), y = residuals)) +
geom_point(color = "#496989") +
geom_hline(yintercept = 0, linetype = "dashed", color = "#561C24") +
labs(title = "Residuals", x = "Index", y = "Residuals")
# Combine plots
plot_grid(actual_vs_fitted, residuals_plot, nrow = 2)
acf(sales_ts)
dilihat dari plotnya. data nya tidak bersifat musiman, teapi bersifat siklus.
3
Evaluate and compare the performance of both models in forecasting future sales revenue.
# Generate forecasts for both models
arima_forecast <- forecast(arima_model, h = 30) # Forecast for the next 30 periods
# Calculate evaluation metrics for ARIMA model
arima_mse <- mean((as.numeric(arima_forecast$mean) - simulated_data$Sales_Revenue)^2)
arima_mae <- mean(abs(as.numeric(arima_forecast$mean) - simulated_data$Sales_Revenue))
arima_mape <- mean(abs((as.numeric(arima_forecast$mean) - simulated_data$Sales_Revenue) / simulated_data$Sales_Revenue)) * 100
# Calculate evaluation metrics for linear regression model
linear_reg_mse <- mean((rep(simulated_data$Sales_Revenue[length(simulated_data$Sales_Revenue)], 30) - simulated_data$Sales_Revenue[length(simulated_data$Sales_Revenue)])^2)
linear_reg_mae <- mean(abs(rep(simulated_data$Sales_Revenue[length(simulated_data$Sales_Revenue)], 30) - simulated_data$Sales_Revenue))
linear_reg_mape <- mean(abs((rep(simulated_data$Sales_Revenue[length(simulated_data$Sales_Revenue)], 30) - simulated_data$Sales_Revenue) / simulated_data$Sales_Revenue)) * 100
# Print evaluation metrics
cat("Evaluation metrics for ARIMA model:\n")
## Evaluation metrics for ARIMA model:
cat("MSE:", arima_mse, "\n")
## MSE: 18802.16
cat("MAE:", arima_mae, "\n")
## MAE: 109.4466
cat("MAPE:", arima_mape, "%\n\n")
## MAPE: 11.29185 %
cat("Evaluation metrics for Linear Regression model:\n")
## Evaluation metrics for Linear Regression model:
cat("MSE:", linear_reg_mse, "\n")
## MSE: 0
cat("MAE:", linear_reg_mae, "\n")
## MAE: 218.977
cat("MAPE:", linear_reg_mape, "%\n")
## MAPE: 20.61591 %
Note - Forecast:
Kode tersebut menghasilkan
peramalan untuk kedua model. Untuk ARIMA, menggunakan fungsi forecast().
- Menghitung Metrik Evaluasi:
Untuk model ARIMA:
Mean Squared
Error (MSE)
Mean Absolute Error (MAE)
Mean Absolute Percentage
Error (MAPE)
- Untuk model regresi linear:
MSE
MAE
MAPE
Metrik-metrik ini dihitung dengan membandingkan nilai-nilai
yang diramalkan dengan data simulasi aktual
(simulated_data$Sales_Revenue). - Mencetak Metrik Evaluasi: Akhirnya,
kode tersebut mencetak metrik evaluasi untuk model ARIMA menggunakan
fungsi cat().