library(tseries)
## Warning: package 'tseries' was built under R version 4.4.3
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
library(forecast)
## Warning: package 'forecast' was built under R version 4.4.3
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.4.3
data <- read.csv("C:/Users/georg/Downloads/archive (2)/TSLA_stock_data.csv")
str(data)
## 'data.frame': 3592 obs. of 7 variables:
## $ Date : chr "2010-06-29" "2010-06-30" "2010-07-01" "2010-07-02" ...
## $ Open : num 1.27 1.72 1.67 1.53 1.33 ...
## $ High : num 1.67 2.03 1.73 1.54 1.33 ...
## $ Low : num 1.17 1.55 1.35 1.25 1.06 ...
## $ Close : num 1.59 1.59 1.46 1.28 1.07 ...
## $ Adj.Close: num 1.59 1.59 1.46 1.28 1.07 ...
## $ Volume : int 281494500 257806500 123282000 77097000 103003500 103825500 115671000 60759000 33037500 40201500 ...
head(data)
## Date Open High Low Close Adj.Close Volume
## 1 2010-06-29 1.266667 1.666667 1.169333 1.592667 1.592667 281494500
## 2 2010-06-30 1.719333 2.028000 1.553333 1.588667 1.588667 257806500
## 3 2010-07-01 1.666667 1.728000 1.351333 1.464000 1.464000 123282000
## 4 2010-07-02 1.533333 1.540000 1.247333 1.280000 1.280000 77097000
## 5 2010-07-06 1.333333 1.333333 1.055333 1.074000 1.074000 103003500
## 6 2010-07-07 1.093333 1.108667 0.998667 1.053333 1.053333 103825500
ts_data <- ts(data$Close, frequency = 252, start = c(2010, 1))
plot(ts_data, main = "Tesla Stock Price Time Series",
ylab = "Price (USD)", xlab = "Time",
col = "blue", lwd = 2)
grid()

plot(decompose(ts_data))

cat("\n=== UJI STASIONERITAS DATA ORIGINAL ===\n")
##
## === UJI STASIONERITAS DATA ORIGINAL ===
adf_original <- adf.test(ts_data)
print(adf_original)
##
## Augmented Dickey-Fuller Test
##
## data: ts_data
## Dickey-Fuller = -2.4972, Lag order = 15, p-value = 0.3678
## alternative hypothesis: stationary
if(adf_original$p.value < 0.05) {
cat("\nData STASIONER (p-value < 0.05)\n")
} else {
cat("\nData NON-STASIONER (p-value >= 0.05) - Perlu differencing\n")
}
##
## Data NON-STASIONER (p-value >= 0.05) - Perlu differencing
diff1 <- diff(ts_data, differences = 1)
plot(diff1, main = "First Difference of Tesla Stock Price",
ylab = "Differenced Price", xlab = "Time",
col = "darkgreen", lwd = 2)
grid()
abline(h = 0, col = "red", lty = 2)

cat("\n=== UJI STASIONERITAS SETELAH DIFFERENCING ===\n")
##
## === UJI STASIONERITAS SETELAH DIFFERENCING ===
adf_diff <- adf.test(diff1)
## Warning in adf.test(diff1): p-value smaller than printed p-value
print(adf_diff)
##
## Augmented Dickey-Fuller Test
##
## data: diff1
## Dickey-Fuller = -15.458, Lag order = 15, p-value = 0.01
## alternative hypothesis: stationary
if(adf_diff$p.value < 0.05) {
cat("\nData setelah differencing SUDAH STASIONER\n")
} else {
cat("\nData masih belum stasioner, pertimbangkan differencing ke-2\n")
}
##
## Data setelah differencing SUDAH STASIONER
par(mfrow = c(2, 2))
acf(ts_data, main = "ACF - Original Data", lag.max = 40)
pacf(ts_data, main = "PACF - Original Data", lag.max = 40)
acf(diff1, main = "ACF - Differenced Data", lag.max = 40)
pacf(diff1, main = "PACF - Differenced Data", lag.max = 40)

par(mfrow = c(1, 1))
cat("\n=== IDENTIFIKASI MODEL ARIMA ===\n")
##
## === IDENTIFIKASI MODEL ARIMA ===
cat("Berdasarkan ACF dan PACF:\n")
## Berdasarkan ACF dan PACF:
cat("- Jika ACF cuts off dan PACF tails off → MA(q)\n")
## - Jika ACF cuts off dan PACF tails off → MA(q)
cat("- Jika PACF cuts off dan ACF tails off → AR(p)\n")
## - Jika PACF cuts off dan ACF tails off → AR(p)
cat("- Jika keduanya tails off → ARMA(p,q)\n\n")
## - Jika keduanya tails off → ARMA(p,q)
auto_model <- auto.arima(ts_data, trace = TRUE, ic = "aic")
##
## Fitting models using approximations to speed things up...
##
## ARIMA(2,1,2)(1,0,1)[252] with drift : Inf
## ARIMA(0,1,0) with drift : 21522.17
## ARIMA(1,1,0)(1,0,0)[252] with drift : Inf
## ARIMA(0,1,1)(0,0,1)[252] with drift : Inf
## ARIMA(0,1,0) : 21520.91
## ARIMA(0,1,0)(1,0,0)[252] with drift : Inf
## ARIMA(0,1,0)(0,0,1)[252] with drift : 21523.61
## ARIMA(0,1,0)(1,0,1)[252] with drift : Inf
## ARIMA(1,1,0) with drift : 21521.27
## ARIMA(0,1,1) with drift : 21520.38
## ARIMA(0,1,1)(1,0,0)[252] with drift : Inf
## ARIMA(0,1,1)(1,0,1)[252] with drift : Inf
## ARIMA(1,1,1) with drift : 21521.21
## ARIMA(0,1,2) with drift : 21521.71
## ARIMA(1,1,2) with drift : Inf
## ARIMA(0,1,1) : 21519.16
## ARIMA(0,1,1)(1,0,0)[252] : Inf
## ARIMA(0,1,1)(0,0,1)[252] : Inf
## ARIMA(0,1,1)(1,0,1)[252] : Inf
## ARIMA(1,1,1) : 21519.96
## ARIMA(0,1,2) : 21520.47
## ARIMA(1,1,0) : 21520.05
## ARIMA(1,1,2) : Inf
##
## Now re-fitting the best model(s) without approximations...
##
## ARIMA(0,1,1) : 21522.31
##
## Best model: ARIMA(0,1,1)
cat("\n=== AUTO ARIMA SUGGESTION ===\n")
##
## === AUTO ARIMA SUGGESTION ===
print(summary(auto_model))
## Series: ts_data
## ARIMA(0,1,1)
##
## Coefficients:
## ma1
## -0.0319
## s.e. 0.0164
##
## sigma^2 = 23.45: log likelihood = -10759.16
## AIC=21522.31 AICc=21522.32 BIC=21534.69
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set 0.07137161 4.840737 2.122604 0.07936019 2.483574 0.06247216
## ACF1
## Training set -0.0006828965
cat("\n=== PERBANDINGAN MODEL ARIMA ===\n")
##
## === PERBANDINGAN MODEL ARIMA ===
model1 <- Arima(ts_data, order = c(1, 1, 1))
cat("\nModel 1 - ARIMA(1,1,1):\n")
##
## Model 1 - ARIMA(1,1,1):
cat("AIC:", AIC(model1), "\n")
## AIC: 21522.12
cat("BIC:", BIC(model1), "\n")
## BIC: 21540.68
model2 <- Arima(ts_data, order = c(0, 1, 1))
cat("\nModel 2 - ARIMA(0,1,1):\n")
##
## Model 2 - ARIMA(0,1,1):
cat("AIC:", AIC(model2), "\n")
## AIC: 21522.31
cat("BIC:", BIC(model2), "\n")
## BIC: 21534.69
model3 <- Arima(ts_data, order = c(1, 1, 0))
cat("\nModel 3 - ARIMA(1,1,0):\n")
##
## Model 3 - ARIMA(1,1,0):
cat("AIC:", AIC(model3), "\n")
## AIC: 21522.21
cat("BIC:", BIC(model3), "\n")
## BIC: 21534.58
model4 <- Arima(ts_data, order = c(2, 1, 2))
cat("\nModel 4 - ARIMA(2,1,2):\n")
##
## Model 4 - ARIMA(2,1,2):
cat("AIC:", AIC(model4), "\n")
## AIC: 21523.55
cat("BIC:", BIC(model4), "\n")
## BIC: 21554.48
models <- list(model1, model2, model3, model4)
aics <- sapply(models, AIC)
best_model <- models[[which.min(aics)]]
cat("\n=== MODEL TERBAIK ===\n")
##
## === MODEL TERBAIK ===
print(summary(best_model))
## Series: ts_data
## ARIMA(1,1,1)
##
## Coefficients:
## ar1 ma1
## -0.5840 0.5510
## s.e. 0.1545 0.1581
##
## sigma^2 = 23.44: log likelihood = -10758.06
## AIC=21522.12 AICc=21522.13 BIC=21540.68
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set 0.07058155 4.839258 2.122083 0.07848693 2.483125 0.06245681
## ACF1
## Training set 0.0009906909
cat("\n=== DIAGNOSTIK RESIDUAL ===\n")
##
## === DIAGNOSTIK RESIDUAL ===
par(mfrow = c(2, 2))
plot(residuals(best_model), main = "Residuals Time Series",
ylab = "Residuals", col = "blue")
abline(h = 0, col = "red", lty = 2)
acf(residuals(best_model), main = "ACF of Residuals")
pacf(residuals(best_model), main = "PACF of Residuals")
hist(residuals(best_model), main = "Histogram of Residuals",
xlab = "Residuals", col = "lightblue", probability = TRUE)
lines(density(residuals(best_model)), col = "red", lwd = 2)

par(mfrow = c(1, 1))
qqnorm(residuals(best_model), main = "Normal Q-Q Plot")
qqline(residuals(best_model), col = "red", lwd = 2)

cat("\n=== UJI WHITE NOISE (LJUNG-BOX TEST) ===\n")
##
## === UJI WHITE NOISE (LJUNG-BOX TEST) ===
lb_test <- Box.test(residuals(best_model), lag = 20, type = "Ljung-Box")
print(lb_test)
##
## Box-Ljung test
##
## data: residuals(best_model)
## X-squared = 102.82, df = 20, p-value = 3.925e-13
if(lb_test$p.value > 0.05) {
cat("\nResidual adalah WHITE NOISE (p-value > 0.05) ✓\n")
cat("Model sudah adequate\n")
} else {
cat("\nResidual BUKAN white noise (p-value <= 0.05) ✗\n")
cat("Model perlu diperbaiki\n")
}
##
## Residual BUKAN white noise (p-value <= 0.05) ✗
## Model perlu diperbaiki
cat("\n=== UJI NORMALITAS (JARQUE-BERA TEST) ===\n")
##
## === UJI NORMALITAS (JARQUE-BERA TEST) ===
jb_test <- jarque.bera.test(residuals(best_model))
print(jb_test)
##
## Jarque Bera Test
##
## data: residuals(best_model)
## X-squared = 42380, df = 2, p-value < 2.2e-16
if(jb_test$p.value > 0.05) {
cat("\nResidual berdistribusi NORMAL (p-value > 0.05) ✓\n")
} else {
cat("\nResidual TIDAK berdistribusi normal (p-value <= 0.05)\n")
cat("Catatan: Untuk data finansial, sering terjadi heavy-tailed distribution\n")
}
##
## Residual TIDAK berdistribusi normal (p-value <= 0.05)
## Catatan: Untuk data finansial, sering terjadi heavy-tailed distribution
cat("\n=== FORECASTING ===\n")
##
## === FORECASTING ===
forecast_result <- forecast(best_model, h = 30)
plot(forecast_result, main = "Tesla Stock Price Forecast",
xlab = "Time", ylab = "Price (USD)",
col = "blue", lwd = 2)
grid()

cat("\n========================================\n")
##
## ========================================
cat("RINGKASAN ANALISIS\n")
## RINGKASAN ANALISIS
cat("========================================\n")
## ========================================
cat("Model Terbaik:", paste0("ARIMA(", best_model$arma[1], ",",
best_model$arma[6], ",", best_model$arma[2], ")\n"))
## Model Terbaik: ARIMA(1,1,1)
cat("AIC:", AIC(best_model), "\n")
## AIC: 21522.12
cat("BIC:", BIC(best_model), "\n")
## BIC: 21540.68
cat("\nUji Stasioneritas (ADF):\n")
##
## Uji Stasioneritas (ADF):
cat("- Original: p-value =", round(adf_original$p.value, 4), "\n")
## - Original: p-value = 0.3678
cat("- Differenced: p-value =", round(adf_diff$p.value, 4), "\n")
## - Differenced: p-value = 0.01
cat("\nUji White Noise (Ljung-Box):\n")
##
## Uji White Noise (Ljung-Box):
cat("- p-value =", round(lb_test$p.value, 4), "\n")
## - p-value = 0
cat("\nUji Normalitas (Jarque-Bera):\n")
##
## Uji Normalitas (Jarque-Bera):
cat("- p-value =", round(jb_test$p.value, 4), "\n")
## - p-value = 0
cat("========================================\n")
## ========================================