# Load required libraries
# Load required libraries
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.1 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.1
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(lubridate)
# Set seed for reproducibility
set.seed(123)
# Number of observations
n <- 100
# Simulate date range
start_date <- ymd("2022-01-01")
end_date <- ymd("2022-04-10")
dates <- seq(start_date, end_date, by = "day")
# Simulate predictor variables
promotional_spending <- runif(n, min = 1000, max = 5000)
price <- rnorm(n, mean = 50, sd = 10)
weather_conditions <- sample(c("sunny", "cloudy", "rainy"), size = n, replace = TRUE)
# Simulate sales revenue
sales_trend <- 0.1 * seq(1, n)
seasonal_pattern <- sin(seq(1, n) * 2 * pi / 365 * 7) * 100
sales_noise <- rnorm(n, mean = 0, sd = 100)
sales_revenue <- 1000 + sales_trend + seasonal_pattern + sales_noise
# Create dataframe
simulated_data <- tibble(
Date = dates,
Promotional_Spending = promotional_spending,
Price = price,
Weather_Conditions = weather_conditions,
Sales_Revenue = sales_revenue
)
# Display the first few rows of the dataset
head(simulated_data)
## # A tibble: 6 × 5
## Date Promotional_Spending Price Weather_Conditions Sales_Revenue
## <date> <dbl> <dbl> <chr> <dbl>
## 1 2022-01-01 2150. 52.5 cloudy 1049.
## 2 2022-01-02 4153. 49.7 cloudy 978.
## 3 2022-01-03 2636. 49.6 sunny 1068.
## 4 2022-01-04 4532. 63.7 cloudy 1018.
## 5 2022-01-05 4762. 47.7 sunny 1166.
## 6 2022-01-06 1182. 65.2 sunny 1083.
ggplot(simulated_data, aes(x = Date, y = Sales_Revenue)) +
geom_line(color = "red") +
labs(title = "Penjualan Harian",
x = "Tanggal",
y = "Pendapatan Penjualan") +
theme_minimal()
model <- lm(Sales_Revenue ~ Promotional_Spending + Price + Weather_Conditions, data = simulated_data)
summary(model)
##
## Call:
## lm(formula = Sales_Revenue ~ Promotional_Spending + Price + Weather_Conditions,
## data = simulated_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -340.29 -71.58 11.21 82.72 315.61
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 992.92772 81.16696 12.233 <2e-16 ***
## Promotional_Spending 0.01922 0.01197 1.605 0.112
## Price -0.64144 1.42687 -0.450 0.654
## Weather_Conditionsrainy -22.74556 36.17645 -0.629 0.531
## Weather_Conditionssunny -23.58060 32.57721 -0.724 0.471
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 134.7 on 95 degrees of freedom
## Multiple R-squared: 0.03842, Adjusted R-squared: -0.002062
## F-statistic: 0.9491 on 4 and 95 DF, p-value: 0.4393
new_promotional_spending <- c(6000, 7000, 8000)
new_price <- c(90, 95, 100)
new_weather_conditions <- c("sunny", "rainy", "cloudy")
new_data <- data.frame(Promotional_Spending = new_promotional_spending,
Price = new_price,
Weather_Conditions = new_weather_conditions)
predicted_sales <- predict(model, newdata = new_data)
predicted_sales <- predict(model, newdata = simulated_data)
predicted_data <- cbind(simulated_data, Predicted_Sales = predicted_sales)
ggplot(predicted_data, aes(x = Date)) +
geom_line(aes(y = Sales_Revenue, color = "Actual Sales")) +
geom_line(aes(y = Predicted_Sales, color = "Predicted Sales"), linetype = "dashed") +
labs(title = "Perbandingan Actual dan Prediksi",
x = "Tanggal",
y = "Sales Revenue") +
scale_color_manual(name = "Data", values = c("Actual Sales" = "blue", "Predicted Sales" = "red")) +
theme_minimal()
Dari plot diatas menunjukan tingkat akurasi yang rendah di model prediksinya, karena garis actual sales jauh dari predicted salesnya.
library(lmtest)
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
bptest(model)
##
## studentized Breusch-Pagan test
##
## data: model
## BP = 7.7981, df = 4, p-value = 0.09926
nilai p yang sesuai adalah 0.09926. Karena nilai p lebih dari 0.05, kita gagal menolak hipotesis nol. Kita tidak memiliki bukti yang cukup untuk mengatakan bahwa heteroskedastisitas hadir dalam model regresi.
# Memuat library forecast untuk pemodelan ARIMA
library(forecast)
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
# Membuat objek ts untuk analisis deret waktu
sales_ts <- ts(simulated_data$Sales_Revenue, frequency = 365)
# Create a dataframe from the ts object for easier use with plotly
ts_data <- data.frame(Time = 1:length(sales_ts), Sales_Revenue = as.numeric(sales_ts))
arima_model <- auto.arima(sales_ts)
summary(arima_model)
## Series: sales_ts
## ARIMA(1,0,1) with non-zero mean
##
## Coefficients:
## ar1 ma1 mean
## 0.9198 -0.7377 1001.994
## s.e. 0.0535 0.0803 36.507
##
## sigma^2 = 15050: log likelihood = -621.52
## AIC=1251.04 AICc=1251.47 BIC=1261.47
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set -1.929319 120.8257 94.75599 -1.80019 9.983429 NaN -0.0317043
Output menunjukkan bahwa model ARIMA(1,0,1) berhasil menangkap sebagian besar informasi dalam data, dengan beberapa kesalahan yang tetap ada. Metrik evaluasi memberikan gambaran di mana model mungkin perlu diperbaiki atau dikalibrasi.
forecast_arima <- forecast(arima_model, h = 12)
plot_time_series <- plot_ly(x = ~time(sales_ts), y = ~as.numeric(sales_ts), type = 'scatter', mode = 'lines', name = 'Actual') %>%
add_trace(x = time(forecast_arima$mean), y = forecast_arima$mean, mode = 'lines', name = 'Predicted (ARIMA)') %>%
layout(title = 'Perbandingan Actal dengan Prediksi - ARIMA Model',
xaxis = list(title = 'Time'),
yaxis = list(title = 'Sales Revenue'))
plot_time_series
# Generate forecasts for both models
arima_forecast <- forecast(arima_model, h = 30) # Forecast for the next 30 periods
# Calculate evaluation metrics for ARIMA model
arima_mse <- mean((as.numeric(arima_forecast$mean) - simulated_data$Sales_Revenue)^2)
## Warning in as.numeric(arima_forecast$mean) - simulated_data$Sales_Revenue:
## longer object length is not a multiple of shorter object length
arima_mae <- mean(abs(as.numeric(arima_forecast$mean) - simulated_data$Sales_Revenue))
## Warning in as.numeric(arima_forecast$mean) - simulated_data$Sales_Revenue:
## longer object length is not a multiple of shorter object length
arima_mape <- mean(abs((as.numeric(arima_forecast$mean) - simulated_data$Sales_Revenue) / simulated_data$Sales_Revenue)) * 100
## Warning in as.numeric(arima_forecast$mean) - simulated_data$Sales_Revenue:
## longer object length is not a multiple of shorter object length
cat("MSE:", arima_mse, "\n")
## MSE: 18802.16
cat("MAE:", arima_mae, "\n")
## MAE: 109.4466
cat("MAPE:", arima_mape, "%\n")
## MAPE: 11.29185 %
linear_reg_mse <- mean((rep(simulated_data$Sales_Revenue[length(simulated_data$Sales_Revenue)], 30) - simulated_data$Sales_Revenue[length(simulated_data$Sales_Revenue)])^2)
linear_reg_mae <- mean(abs(rep(simulated_data$Sales_Revenue[length(simulated_data$Sales_Revenue)], 30) - simulated_data$Sales_Revenue))
## Warning in
## rep(simulated_data$Sales_Revenue[length(simulated_data$Sales_Revenue)], :
## longer object length is not a multiple of shorter object length
linear_reg_mape <- mean(abs((rep(simulated_data$Sales_Revenue[length(simulated_data$Sales_Revenue)], 30) - simulated_data$Sales_Revenue) / simulated_data$Sales_Revenue)) * 100
## Warning in
## rep(simulated_data$Sales_Revenue[length(simulated_data$Sales_Revenue)], :
## longer object length is not a multiple of shorter object length
cat("MSE:", linear_reg_mse, "\n")
## MSE: 0
cat("MAE:", linear_reg_mae, "\n")
## MAE: 218.977
cat("MAPE:", linear_reg_mape, "%\n")
## MAPE: 20.61591 %