Tugas 12-Ekonometrika

Dhela Agatha

May 06, 2024

Dataset

The dataset consists of historical sales data collected over the past two years, including information on :

  • Date: Date of Sale
  • Promotional_Spending: Amount of Promotional Spending
  • Price: Product Price
  • Weather_Condition: Weather Condition
  • Sales_Revenue : Total Sales Revenue
# Load required libraries
library(tidyverse)
library(lubridate)

# Set seed for reproducibility
set.seed(123)

# Number of observations
n <- 100

# Simulate date range
start_date <- ymd("2022-01-01")
end_date <- ymd("2022-04-10")  # Update end date to have 100 days
dates <- seq(start_date, end_date, by = "day")

# Simulate predictor variables
promotional_spending <- runif(n, min = 1000, max = 5000)
price <- rnorm(n, mean = 50, sd = 10)
weather_conditions <- sample(c("sunny", "cloudy", "rainy"), size = n, replace = TRUE)

# Simulate sales revenue
sales_trend <- 0.1 * seq(1, n)
seasonal_pattern <- sin(seq(1, n) * 2 * pi / 365 * 7) * 100
sales_noise <- rnorm(n, mean = 0, sd = 100)
sales_revenue <- 1000 + sales_trend + seasonal_pattern + sales_noise

# Create dataframe
simulated_data <- tibble(
  Date = dates,
  Promotional_Spending = promotional_spending,
  Price = price,
  Weather_Conditions = weather_conditions,
  Sales_Revenue = sales_revenue
)

# Display the first few rows of the dataset
head(simulated_data)

1

Develop a regression model to understand the relationship between sales revenue and various predictors such as promotional spending, pricing, and external factors.

# Load required library
library(tidyverse)

# Fit linear regression model
model <- lm(Sales_Revenue ~ Promotional_Spending + Price + Weather_Conditions, data = simulated_data)

# Summarize the model
summary(model)
## 
## Call:
## lm(formula = Sales_Revenue ~ Promotional_Spending + Price + Weather_Conditions, 
##     data = simulated_data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -340.29  -71.58   11.21   82.72  315.61 
## 
## Coefficients:
##                          Estimate Std. Error t value Pr(>|t|)    
## (Intercept)             992.92772   81.16696  12.233   <2e-16 ***
## Promotional_Spending      0.01922    0.01197   1.605    0.112    
## Price                    -0.64144    1.42687  -0.450    0.654    
## Weather_Conditionsrainy -22.74556   36.17645  -0.629    0.531    
## Weather_Conditionssunny -23.58060   32.57721  -0.724    0.471    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 134.7 on 95 degrees of freedom
## Multiple R-squared:  0.03842,    Adjusted R-squared:  -0.002062 
## F-statistic: 0.9491 on 4 and 95 DF,  p-value: 0.4393

note disini kita akan meload data nya.

# Load required libraries
library(ggplot2)
library(ggfortify)
library(gridExtra)
library(cowplot)

# Predict sales revenue using the model
simulated_data$Predicted_Sales <- predict(model)

# Residual vs Fitted plot
residual_plot <- ggplot(simulated_data, aes(x = Predicted_Sales, y = resid(model))) +
  geom_point(color = "#8B322C") +
  geom_hline(yintercept = 0, linetype = "dashed", color = "#17B0AE") +
  labs(x = "Fitted Values", y = "Residuals", title = "Residuals vs Fitted") +
  theme_minimal()

# Distribution of Residuals plot
residual_distribution <- ggplot(simulated_data, aes(x = resid(model))) +
  geom_histogram(binwidth = 100, fill = "#436850", color = "#12372A") +
  labs(x = "Residuals", y = "Frequency", title = "Distribution of Residuals") +
  theme_minimal()


# Combine plots
plot_grid(residual_plot, residual_distribution, nrow = 2)

2

Build a time series model to capture the temporal patterns and trends in sales revenue, accounting for seasonality and other time-related effects.

# Load required libraries
library(forecast)
library(gridExtra)

# Convert Date column to Date format if not already
simulated_data$Date <- as.Date(simulated_data$Date)

# Set Date column as time series
sales_ts <- ts(simulated_data$Sales_Revenue, frequency = 7)  # Assuming weekly seasonality

# Fit ARIMA model
arima_model <- auto.arima(sales_ts)

# Actual vs. Fitted plot
actual_vs_fitted <- autoplot(forecast(arima_model), main = "Actual vs Fitted", xlab = "Date", ylab = "Sales Revenue")

# Residuals plot
residuals_plot <- ggplot(data.frame(residuals = residuals(arima_model)), aes(x = 1:length(residuals), y = residuals)) +
  geom_point(color = "#496989") +
  geom_hline(yintercept = 0, linetype = "dashed", color = "#561C24") +
  labs(title = "Residuals", x = "Index", y = "Residuals")

# Combine plots
plot_grid(actual_vs_fitted, residuals_plot, nrow = 2)

acf(sales_ts)

dilihat dari plotnya. data nya tidak bersifat musiman, teapi bersifat siklus.

3

Evaluate and compare the performance of both models in forecasting future sales revenue.

# Generate forecasts for both models
arima_forecast <- forecast(arima_model, h = 30)  # Forecast for the next 30 periods

# Calculate evaluation metrics for ARIMA model
arima_mse <- mean((as.numeric(arima_forecast$mean) - simulated_data$Sales_Revenue)^2)
arima_mae <- mean(abs(as.numeric(arima_forecast$mean) - simulated_data$Sales_Revenue))
arima_mape <- mean(abs((as.numeric(arima_forecast$mean) - simulated_data$Sales_Revenue) / simulated_data$Sales_Revenue)) * 100

# Calculate evaluation metrics for linear regression model
linear_reg_mse <- mean((rep(simulated_data$Sales_Revenue[length(simulated_data$Sales_Revenue)], 30) - simulated_data$Sales_Revenue[length(simulated_data$Sales_Revenue)])^2)
linear_reg_mae <- mean(abs(rep(simulated_data$Sales_Revenue[length(simulated_data$Sales_Revenue)], 30) - simulated_data$Sales_Revenue))
linear_reg_mape <- mean(abs((rep(simulated_data$Sales_Revenue[length(simulated_data$Sales_Revenue)], 30) - simulated_data$Sales_Revenue) / simulated_data$Sales_Revenue)) * 100

# Print evaluation metrics
cat("Evaluation metrics for ARIMA model:\n")
## Evaluation metrics for ARIMA model:
cat("MSE:", arima_mse, "\n")
## MSE: 18802.16
cat("MAE:", arima_mae, "\n")
## MAE: 109.4466
cat("MAPE:", arima_mape, "%\n\n")
## MAPE: 11.29185 %
cat("Evaluation metrics for Linear Regression model:\n")
## Evaluation metrics for Linear Regression model:
cat("MSE:", linear_reg_mse, "\n")
## MSE: 0
cat("MAE:", linear_reg_mae, "\n")
## MAE: 218.977
cat("MAPE:", linear_reg_mape, "%\n")
## MAPE: 20.61591 %

Note - Forecast:
Kode tersebut menghasilkan peramalan untuk kedua model. Untuk ARIMA, menggunakan fungsi forecast(). - Menghitung Metrik Evaluasi:
Untuk model ARIMA:
Mean Squared Error (MSE)
Mean Absolute Error (MAE)
Mean Absolute Percentage Error (MAPE)
- Untuk model regresi linear:
MSE
MAE
MAPE
Metrik-metrik ini dihitung dengan membandingkan nilai-nilai yang diramalkan dengan data simulasi aktual (simulated_data$Sales_Revenue). - Mencetak Metrik Evaluasi: Akhirnya, kode tersebut mencetak metrik evaluasi untuk model ARIMA menggunakan fungsi cat().