Najla Dhia Rusydi (164221043)

Rashiqa Dewi Nariswari (164221044)

Fradinka Amelia Edyputri (164221045)

Load Library

library(readxl)
## Warning: package 'readxl' was built under R version 4.3.3
library(starma)
## Warning: package 'starma' was built under R version 4.3.3
library(tseries)
## Warning: package 'tseries' was built under R version 4.3.3
## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo
library(MASS)
library(cursr)
## Warning: package 'cursr' was built under R version 4.3.3
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following object is masked from 'package:MASS':
## 
##     select
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.3.3
library(forecast)
## Warning: package 'forecast' was built under R version 4.3.3
library(tseries)
library(zoo)
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
library(lubridate)
## Warning: package 'lubridate' was built under R version 4.3.3
## 
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
## 
##     date, intersect, setdiff, union

Load Data Time Series

library(readr)
data <- read_csv("D:/Bismillah Semester 6/Pemodelan Runtun Waktu/Tugas Kelompok 1/POP.csv", col_types = cols(
  date = col_date(format = "%Y-%m-%d"),
  value = col_double()
))

data <- data[, c("date", "value")]

# Ubah kolom date menjadi format Date
data$date <- as.Date(data$date)

# Urutkan data berdasarkan tanggal (jika belum terurut)
data <- data[order(data$date), ]
head(data)
## # A tibble: 6 × 2
##   date        value
##   <date>      <dbl>
## 1 1952-01-01 156309
## 2 1952-02-01 156527
## 3 1952-03-01 156731
## 4 1952-04-01 156943
## 5 1952-05-01 157140
## 6 1952-06-01 157343
length(unique(data$date))
## [1] 816
# Ubah data menjadi time series (dengan asumsi data bulanan)
tahun_awal <- year(min(data$date))  # Tahun pertama dalam dataset
bulan_awal <- month(min(data$date))  # Bulan pertama dalam dataset
dataset <- ts(data$value, start=c(tahun_awal, bulan_awal), frequency=12)
# Tampilkan informasi time series
print("Time Series Object:")
## [1] "Time Series Object:"
print(dataset)
##           Jan      Feb      Mar      Apr      May      Jun      Jul      Aug
## 1952 156309.0 156527.0 156731.0 156943.0 157140.0 157343.0 157553.0 157798.0
## 1953 158973.0 159170.0 159349.0 159556.0 159745.0 159956.0 160184.0 160449.0
## 1954 161690.0 161912.0 162124.0 162350.0 162564.0 162790.0 163026.0 163290.0
## 1955 164588.0 164809.0 165018.0 165251.0 165463.0 165695.0 165931.0 166192.0
## 1956 167513.0 167746.0 167977.0 168221.0 168436.0 168659.0 168903.0 169191.0
## 1957 170571.0 170806.0 171029.0 171271.0 171501.0 171741.0 171984.0 172257.0
## 1958 173533.0 173746.0 173945.0 174176.0 174397.0 174639.0 174882.0 175143.0
## 1959 176447.0 176685.0 176905.0 177146.0 177365.0 177591.0 177830.0 178101.0
## 1960 179386.0 179597.0 179788.0 180007.0 180222.0 180444.0 180671.0 180945.0
## 1961 182287.0 182520.0 182742.0 182992.0 183217.0 183452.0 183691.0 183958.0
## 1962 185242.0 185452.0 185650.0 185874.0 186087.0 186314.0 186538.0 186790.0
## 1963 188013.0 188213.0 188387.0 188580.0 188790.0 189018.0 189242.0 189496.0
## 1964 190668.0 190858.0 191047.0 191245.0 191447.0 191666.0 191889.0 192131.0
## 1965 193223.0 193393.0 193540.0 193709.0 193888.0 194087.0 194303.0 194528.0
## 1966 195539.0 195688.0 195831.0 195999.0 196178.0 196372.0 196560.0 196762.0
## 1967 197736.0 197892.0 198037.0 198206.0 198363.0 198537.0 198712.0 198911.0
## 1968 199808.0 199920.0 200056.0 200208.0 200361.0 200536.0 200706.0 200898.0
## 1969 201760.0 201881.0 202023.0 202161.0 202331.0 202507.0 202677.0 202877.0
## 1970 203849.0 204008.0 204156.0 204401.0 204607.0 204830.0 205052.0 205295.0
## 1971 206466.0 206668.0 206855.0 207065.0 207260.0 207462.0 207661.0 207881.0
## 1972 208917.0 209061.0 209212.0 209386.0 209545.0 209725.0 209896.0 210075.0
## 1973 210985.0 211120.0 211254.0 211420.0 211577.0 211746.0 211909.0 212092.0
## 1974 212932.0 213074.0 213211.0 213361.0 213513.0 213686.0 213854.0 214042.0
## 1975 214931.0 215065.0 215198.0 215353.0 215523.0 215768.0 215973.0 216195.0
## 1976 217095.0 217249.0 217381.0 217528.0 217685.0 217861.0 218035.0 218233.0
## 1977 219179.0 219344.0 219504.0 219684.0 219859.0 220046.0 220239.0 220458.0
## 1978 221477.0 221629.0 221792.0 221991.0 222176.0 222379.0 222585.0 222805.0
## 1979 223865.0 224053.0 224235.0 224438.0 224632.0 224843.0 225055.0 225295.0
## 1980 226451.0 226656.0 226849.0 227061.0 227251.0 227522.0 227726.0 227953.0
## 1981 228937.0 229071.0 229224.0 229403.0 229575.0 229761.0 229966.0 230187.0
## 1982 231157.0 231313.0 231470.0 231645.0 231809.0 231992.0 232188.0 232392.0
## 1983 233322.0 233473.0 233613.0 233781.0 233922.0 234118.0 234307.0 234501.0
## 1984 235385.0 235527.0 235675.0 235839.0 235993.0 236160.0 236348.0 236549.0
## 1985 237468.0 237602.0 237732.0 237900.0 238074.0 238270.0 238466.0 238679.0
## 1986 239638.0 239788.0 239928.0 240094.0 240271.0 240459.0 240651.0 240854.0
## 1987 241784.0 241930.0 242079.0 242252.0 242423.0 242608.0 242804.0 243012.0
## 1988 243981.0 244131.0 244279.0 244445.0 244610.0 244806.0 245021.0 245240.0
## 1989 246224.0 246378.0 246530.0 246721.0 246906.0 247114.0 247342.0 247573.0
## 1990 248659.0 248827.0 249012.0 249306.0 249565.0 249849.0 250132.0 250439.0
## 1991 251889.0 252135.0 252372.0 252643.0 252913.0 253207.0 253493.0 253807.0
## 1992 255214.0 255448.0 255703.0 255992.0 256285.0 256589.0 256894.0 257232.0
## 1993 258679.0 258919.0 259152.0 259414.0 259680.0 259963.0 260255.0 260566.0
## 1994 261919.0 262123.0 262352.0 262631.0 262877.0 263152.0 263436.0 263724.0
## 1995 265044.0 265270.0 265495.0 265755.0 265998.0 266270.0 266557.0 266843.0
## 1996 268151.0 268364.0 268595.0 268853.0 269108.0 269386.0 269667.0 269976.0
## 1997 271360.0 271585.0 271821.0 272083.0 272342.0 272622.0 272912.0 273237.0
## 1998 274626.0 274838.0 275047.0 275304.0 275564.0 275836.0 276115.0 276418.0
## 1999 277790.0 277992.0 278198.0 278451.0 278717.0 279001.0 279295.0 279602.0
## 2000 280976.0 281190.0 281409.0 281653.0 281877.0 282126.0 282385.0 282653.0
## 2001 283920.0 284137.0 284350.0 284581.0 284810.0 285062.0 285309.0 285570.0
## 2002 286788.0 286994.0 287190.0 287397.0 287623.0 287864.0 288105.0 288360.0
## 2003 289518.0 289714.0 289911.0 290125.0 290346.0 290584.0 290820.0 291072.0
## 2004 292192.0 292368.0 292561.0 292779.0 292997.0 293223.0 293463.0 293719.0
## 2005 294914.0 295105.0 295287.0 295490.0 295704.0 295936.0 296186.0 296440.0
## 2006 297647.0 297854.0 298060.0 298281.0 298496.0 298739.0 298996.0 299263.0
## 2007 300574.0 300802.0 301021.0 301254.0 301483.0 301739.0 302004.0 302267.0
## 2008 303506.0 303711.0 303907.0 304117.0 304323.0 304556.0 304798.0 305045.0
## 2009 306208.0 306402.0 306588.0 306787.0 306984.0 307206.0 307439.0 307685.0
## 2010 308833.0 309027.0 309212.0 309191.2 309369.1 309548.5 309745.7 309957.8
## 2011 310960.7 311113.4 311265.4 311436.2 311607.1 311791.2 311997.0 312205.4
## 2012 313183.2 313339.0 313499.4 313667.1 313830.5 314017.6 314210.8 314422.3
## 2013 315389.6 315520.1 315662.2 315817.9 315983.7 316171.0 316358.8 316580.3
## 2014 317593.9 317753.9 317917.2 318089.2 318269.5 318464.2 318662.4 318893.8
## 2015 319928.6 320074.5 320230.8 320402.3 320584.0 320773.6 320978.2 321202.5
## 2016 322232.9 322398.1 322551.5 322721.2 322900.0 323088.5 323291.0 323501.4
## 2017 324438.2 324581.5 324714.0 324861.8 325019.2 325186.2 325367.6 325567.7
## 2018 326454.1 326600.8 326736.7 326887.9 327048.7 327219.1 327403.9 327600.2
## 2019 328467.8 328610.7 328742.8 328890.2 329047.3 329214.0 329395.0 329591.3
##           Sep      Oct      Nov      Dec
## 1952 158053.0 158306.0 158451.0 158757.0
## 1953 160718.0 160978.0 161223.0 161453.0
## 1954 163570.0 163847.0 164107.0 164349.0
## 1955 166473.0 166755.0 167023.0 167270.0
## 1956 169488.0 169780.0 170063.0 170315.0
## 1957 172538.0 172816.0 173070.0 173298.0
## 1958 175413.0 175697.0 175966.0 176207.0
## 1959 178376.0 178657.0 178921.0 179153.0
## 1960 181238.0 181528.0 181796.0 182042.0
## 1961 184243.0 184524.0 184783.0 185016.0
## 1962 187058.0 187323.0 187574.0 187796.0
## 1963 189761.0 190028.0 190265.0 190472.0
## 1964 192376.0 192631.0 192847.0 193039.0
## 1965 194761.0 194997.0 195195.0 195372.0
## 1966 196984.0 197207.0 197398.0 197572.0
## 1967 199113.0 199311.0 199498.0 199657.0
## 1968 201095.0 201290.0 201466.0 201621.0
## 1969 203090.0 203302.0 203500.0 203675.0
## 1970 205540.0 205788.0 206024.0 206238.0
## 1971 208114.0 208345.0 208555.0 208740.0
## 1972 210278.0 210479.0 210656.0 210821.0
## 1973 212289.0 212475.0 212634.0 212785.0
## 1974 214246.0 214451.0 214625.0 214782.0
## 1975 216393.0 216587.0 216771.0 216931.0
## 1976 218440.0 218644.0 218834.0 219006.0
## 1977 220688.0 220904.0 221109.0 221303.0
## 1978 223053.0 223271.0 223477.0 223670.0
## 1979 225547.0 225801.0 226027.0 226243.0
## 1980 228186.0 228417.0 228612.0 228779.0
## 1981 230412.0 230641.0 230822.0 230989.0
## 1982 232599.0 232816.0 232993.0 233160.0
## 1983 234701.0 234907.0 235078.0 235235.0
## 1984 236760.0 236976.0 237159.0 237316.0
## 1985 238898.0 239113.0 239307.0 239477.0
## 1986 241068.0 241274.0 241467.0 241620.0
## 1987 243223.0 243446.0 243639.0 243809.0
## 1988 245464.0 245693.0 245884.0 246056.0
## 1989 247816.0 248067.0 248281.0 248479.0
## 1990 250751.0 251057.0 251346.0 251626.0
## 1991 254126.0 254435.0 254718.0 254964.0
## 1992 257548.0 257861.0 258147.0 258413.0
## 1993 260867.0 261163.0 261425.0 261674.0
## 1994 264017.0 264301.0 264559.0 264804.0
## 1995 267152.0 267456.0 267715.0 267943.0
## 1996 270284.0 270581.0 270878.0 271125.0
## 1997 273553.0 273852.0 274126.0 274372.0
## 1998 276714.0 277003.0 277277.0 277526.0
## 1999 279903.0 280203.0 280471.0 280716.0
## 2000 282932.0 283201.0 283453.0 283696.0
## 2001 285843.0 286098.0 286341.0 286570.0
## 2002 288618.0 288870.0 289106.0 289313.0
## 2003 291321.0 291574.0 291807.0 292008.0
## 2004 293971.0 294230.0 294466.0 294694.0
## 2005 296707.0 296972.0 297207.0 297431.0
## 2006 299554.0 299835.0 300094.0 300340.0
## 2007 302546.0 302807.0 303054.0 303287.0
## 2008 305309.0 305554.0 305786.0 306004.0
## 2009 307946.0 308189.0 308418.0 308633.0
## 2010 310176.5 310400.0 310595.8 310781.7
## 2011 312429.1 312644.2 312829.5 313009.7
## 2012 314646.7 314854.0 315053.9 315232.8
## 2013 316806.1 317022.3 317228.0 317411.6
## 2014 319125.3 319353.7 319564.2 319746.2
## 2015 321427.6 321653.0 321856.3 322043.1
## 2016 323709.9 323919.7 324106.5 324274.9
## 2017 325766.0 325966.0 326142.6 326301.4
## 2018 327794.8 327991.0 328163.9 328318.9
## 2019 329785.9 329982.0 330154.9 330309.9
print(paste("Start:", paste(start(dataset), collapse = "-")))
## [1] "Start: 1952-1"
print(paste("End:", paste(end(dataset), collapse = "-")))
## [1] "End: 2019-12"
print(paste("Frequency:", frequency(dataset)))
## [1] "Frequency: 12"
print("STATISTICAL SUMMARY")
## [1] "STATISTICAL SUMMARY"
cat("Basic Statistics:\n")
## Basic Statistics:
cat("Mean:", mean(dataset), "\n")
## Mean: 243847.8
cat("Median:", median(dataset), "\n")
## Median: 239557.5
cat("Standard Deviation:", sd(dataset), "\n")
## Standard Deviation: 50519.14
cat("Variance:", var(dataset), "\n")
## Variance: 2552183564
cat("Min:", min(dataset), "\n")
## Min: 156309
cat("Max:", max(dataset), "\n")
## Max: 330309.9
cat("Range:", max(dataset) - min(dataset), "\n")
## Range: 174000.9

Split Data

# Hitung jumlah total observasi
n <- length(dataset)

# Hitung index split
split_index <- floor(0.9 * n)

# Split data
ts_data <- window(dataset, end = time(dataset)[split_index])
ts_test <- window(dataset, start = time(dataset)[split_index + 1])

df_train <- data[1:split_index, ]       # 90% pertama untuk training
df_test <- data[(split_index + 1):n, ]  # 10% terakhir untuk testing
head(ts_data)
##         Jan    Feb    Mar    Apr    May    Jun
## 1952 156309 156527 156731 156943 157140 157343

Identifikasi Model

Plot

ggplot(data, aes(x = date, y = value)) +
  geom_line(color = "steelblue", size = 1) +
  geom_point(color = "darkblue", size = 1.5) +
  labs(title = "Perkembangan Populasi Manusia per Bulan",
       subtitle = paste("Periode:", format(min(data$date), "%Y"), "s.d.", format(max(data$date), "%Y")),
       x = "Tahun", y = "Jumlah Populasi") +
  scale_x_date(date_breaks = "10 year", date_labels = "%Y") +  # hanya label tahunan
  theme_minimal(base_size = 14) +
  theme(
    plot.title = element_text(face = "bold", hjust = 0.5),
    plot.subtitle = element_text(hjust = 0.5),
    axis.text.x = element_text(angle = 0, vjust = 0.5)  # opsional: bisa angle = 45 kalau masih padat
  )
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

library(gridExtra)
## 
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
## 
##     combine
# Hitung rolling mean dan rolling variance selama 12 bulan
rolling_mean <- rollmean(data$value, k = 12, fill = NA)
rolling_var <- rollapply(data$value, width = 12, FUN = var, fill = NA)

# Gabungkan ke data.frame
df_rolling <- data.frame(
  date = data$date,
  Population = data$value,
  RollingMean = rolling_mean,
  RollingVariance = rolling_var
)

# Plot Rolling Mean dan Rolling Variance
p1 <- ggplot(df_rolling, aes(x = date)) +
  geom_line(aes(y = Population), color = "#3498DB", alpha = 0.6) +
  geom_line(aes(y = RollingMean), color = "#E74C3C", size = 1) +
  labs(title = "Populasi dan Rolling Mean (12 bulan)", y = "Populasi") +
  theme_minimal()

p2 <- ggplot(df_rolling, aes(x = date, y = RollingVariance)) +
  geom_line(color = "#2ECC71", size = 1) +
  labs(title = "Rolling Variance (12 bulan)", y = "Variansi") +
  theme_minimal()

# Tampilkan kedua plot
grid.arrange(p1, p2, ncol = 1)
## Warning: Removed 11 rows containing missing values or values outside the scale range
## (`geom_line()`).
## Removed 11 rows containing missing values or values outside the scale range
## (`geom_line()`).

# Hitung rolling mean dan rolling standard deviation (12 bulan)
rolling_mean <- rollmean(ts_data, k = 12, align = "right", fill = NA)
rolling_sd <- rollapply(ts_data, width = 12, FUN = sd, align = "right", fill = NA)

# Plot time series
plot.ts(ts_data, col = "darkblue", lwd = 2, main = "Time Series Populasi Bulanan",
        ylab = "Populasi", xlab = "Waktu")

# Tambahkan garis rolling mean
lines(rolling_mean, col = "green", lwd = 2, lty = 2)

# Tambahkan garis batas ±1 standar deviasi
lines(rolling_mean + rolling_sd, col = "red", lty = 3)
lines(rolling_mean - rolling_sd, col = "red", lty = 3)

# Tambahkan legenda
legend("topleft", legend = c("Populasi", "Rolling Mean", "Mean ± SD"),
       col = c("darkblue", "green", "red"), lty = c(1, 2, 3), lwd = 2)

# Decompose time series
decomposed <- decompose(ts_data)

# Plot hasil dekomposisi
plot(decomposed)

library(forecast)
ggseasonplot(ts_data, year.labels = TRUE, main = "Seasonal Plot", col = rainbow(12))

Uji Stasioneritas

# Uji stasioner dalam varians

library(FinTS)
## Warning: package 'FinTS' was built under R version 4.3.3
## 
## Attaching package: 'FinTS'
## The following object is masked from 'package:forecast':
## 
##     Acf
# Uji ARCH (Autoregressive Conditional Heteroskedasticity)
arch_result <- ArchTest(ts_data, lags=12)
print(arch_result)
## 
##  ARCH LM-test; Null hypothesis: no ARCH effects
## 
## data:  ts_data
## Chi-squared = 722, df = 12, p-value < 2.2e-16
library(forecast)
library(FinTS)
library(ggplot2)

# Transformasi log
ts_transformed <- log(ts_data)

Uji stasioneritas dalam mean

diff_ts1 <- diff(ts_transformed, differences = 1) 
plot(diff_ts1, main = "Differenced 1 Time Series")

adf.test(diff_ts1)
## 
##  Augmented Dickey-Fuller Test
## 
## data:  diff_ts1
## Dickey-Fuller = -1.6196, Lag order = 9, p-value = 0.7393
## alternative hypothesis: stationary
diff_ts <- diff(diff_ts1, differences = 1)
plot(diff_ts, main = "Differenced 2 Time Series")

adf.test(diff_ts)
## Warning in adf.test(diff_ts): p-value smaller than printed p-value
## 
##  Augmented Dickey-Fuller Test
## 
## data:  diff_ts
## Dickey-Fuller = -21.171, Lag order = 9, p-value = 0.01
## alternative hypothesis: stationary

Identifikasi Orde

ACF

acf(diff_ts, main = "ACF - Autocorrelation")

# Hitung ACF
acf_result <- acf(diff_ts, plot = FALSE)

# Ambil lag dan nilai ACF
acf_values <- data.frame(
  Lag = acf_result$lag[, 1, 1] * frequency(diff_ts),  # buat jadi bulat
  ACF = acf_result$acf[, 1, 1]
)

# Hitung batas signifikansi
n <- length(ts_data)
significance_limit <- 1.96 / sqrt(n)

# Tandai yang signifikan
acf_values$Significant <- abs(acf_values$ACF) > significance_limit

print(acf_values)
##    Lag          ACF Significant
## 1    0  1.000000000        TRUE
## 2    1 -0.158098505        TRUE
## 3    2  0.141565329        TRUE
## 4    3 -0.052934392       FALSE
## 5    4 -0.085331112        TRUE
## 6    5 -0.231559562        TRUE
## 7    6 -0.194919402        TRUE
## 8    7 -0.222923918        TRUE
## 9    8 -0.083334407        TRUE
## 10   9 -0.020320864       FALSE
## 11  10  0.097608658        TRUE
## 12  11  0.112056883        TRUE
## 13  12  0.453632635        TRUE
## 14  13  0.116675019        TRUE
## 15  14  0.115447094        TRUE
## 16  15 -0.050927942       FALSE
## 17  16 -0.114175393        TRUE
## 18  17 -0.197622484        TRUE
## 19  18 -0.210712401        TRUE
## 20  19 -0.198396227        TRUE
## 21  20 -0.108881322        TRUE
## 22  21 -0.007350819       FALSE
## 23  22  0.098948464        TRUE
## 24  23  0.124139311        TRUE
## 25  24  0.414110928        TRUE
## 26  25  0.133569735        TRUE
## 27  26  0.112502683        TRUE
## 28  27 -0.047459730       FALSE
## 29  28 -0.115038742        TRUE

PACF

pacf(diff_ts, main = "PACF - Partial Autocorrelation", lag.max = 12)

# Hitung PACF
pacf_result <- pacf(diff_ts, plot = FALSE)

# Ambil lag dan nilai ACF
pacf_values <- data.frame(
  Lag = pacf_result$lag[, 1, 1] * frequency(diff_ts),  # buat jadi bulat
  PACF = pacf_result$acf[, 1, 1]
)

# Hitung batas signifikansi (sama seperti ACF)
n <- length(ts_data)
significance_limit <- 1.96 / sqrt(n)

# Tandai yang signifikan
pacf_values$Significant <- abs(pacf_values$PACF) > significance_limit

# Tampilkan hasil
print(pacf_values)
##    Lag         PACF Significant
## 1    1 -0.158098505        TRUE
## 2    2  0.119558575        TRUE
## 3    3 -0.014907339       FALSE
## 4    4 -0.116139843        TRUE
## 5    5 -0.264252911        TRUE
## 6    6 -0.276379069        TRUE
## 7    7 -0.315708943        TRUE
## 8    8 -0.263228109        TRUE
## 9    9 -0.239496454        TRUE
## 10  10 -0.207568475        TRUE
## 11  11 -0.253070757        TRUE
## 12  12  0.200005751        TRUE
## 13  13  0.210095408        TRUE
## 14  14  0.170679071        TRUE
## 15  15  0.094528789        TRUE
## 16  16  0.030353141       FALSE
## 17  17  0.025255383       FALSE
## 18  18  0.005366235       FALSE
## 19  19 -0.010518863       FALSE
## 20  20 -0.056586052       FALSE
## 21  21 -0.078068954        TRUE
## 22  22 -0.078181520        TRUE
## 23  23 -0.163523966        TRUE
## 24  24  0.076307496        TRUE
## 25  25  0.085082051        TRUE
## 26  26  0.066276600       FALSE
## 27  27  0.036957802       FALSE
## 28  28  0.018707283       FALSE

Simulasi Beberapa Model Manual

library(forecast)
library(Metrics)
## Warning: package 'Metrics' was built under R version 4.3.3
## 
## Attaching package: 'Metrics'
## The following object is masked from 'package:forecast':
## 
##     accuracy
# Parameter musiman tetap
D <- 1
P <- 1
Q <- 1
s <- 12

# Siapkan penyimpanan hasil
results <- data.frame()

# Grid parameter
d_values <- 1:2
p_values <- 0:2
q_values <- 1:2

# Loop kombinasi d, p, q
for (d in d_values) {
  for (p in p_values) {
    for (q in q_values) {
      model_label <- paste0("SARIMA(", p, ",", d, ",", q, ")(", P, ",", D, ",", Q, ")[", s, "]")
      
      # Try-catch untuk menghindari error
      model <- tryCatch({
        Arima(ts_transformed, order = c(p, d, q),
              seasonal = c(P, D, Q),
              include.drift = TRUE)
      }, error = function(e) NULL)
      
      # Jika model berhasil dibentuk
      if (!is.null(model)) {
        fc <- forecast(model, h = length(ts_test))
        pred <- exp(fc$mean)  # back-transform dari log

        # Evaluasi
        mse_val <- mean((ts_test - pred)^2)
        rmse_val <- sqrt(mse_val)
        mape_val <- mape(ts_test, pred) * 100

        results <- rbind(results, data.frame(
          Model = model_label,
          d = d,
          p = p,
          q = q,
          AIC = AIC(model),
          BIC = BIC(model),
          MSE = mse_val,
          RMSE = rmse_val,
          MAPE = mape_val
        ))
      }
    }
  }
}
## Warning in Arima(ts_transformed, order = c(p, d, q), seasonal = c(P, D, : No
## drift term fitted as the order of difference is 2 or more.
## Warning in Arima(ts_transformed, order = c(p, d, q), seasonal = c(P, D, : No
## drift term fitted as the order of difference is 2 or more.

## Warning in Arima(ts_transformed, order = c(p, d, q), seasonal = c(P, D, : No
## drift term fitted as the order of difference is 2 or more.

## Warning in Arima(ts_transformed, order = c(p, d, q), seasonal = c(P, D, : No
## drift term fitted as the order of difference is 2 or more.

## Warning in Arima(ts_transformed, order = c(p, d, q), seasonal = c(P, D, : No
## drift term fitted as the order of difference is 2 or more.

## Warning in Arima(ts_transformed, order = c(p, d, q), seasonal = c(P, D, : No
## drift term fitted as the order of difference is 2 or more.

## Warning in Arima(ts_transformed, order = c(p, d, q), seasonal = c(P, D, : No
## drift term fitted as the order of difference is 2 or more.

## Warning in Arima(ts_transformed, order = c(p, d, q), seasonal = c(P, D, : No
## drift term fitted as the order of difference is 2 or more.

## Warning in Arima(ts_transformed, order = c(p, d, q), seasonal = c(P, D, : No
## drift term fitted as the order of difference is 2 or more.

## Warning in Arima(ts_transformed, order = c(p, d, q), seasonal = c(P, D, : No
## drift term fitted as the order of difference is 2 or more.

## Warning in Arima(ts_transformed, order = c(p, d, q), seasonal = c(P, D, : No
## drift term fitted as the order of difference is 2 or more.

## Warning in Arima(ts_transformed, order = c(p, d, q), seasonal = c(P, D, : No
## drift term fitted as the order of difference is 2 or more.
# Urutkan hasil berdasarkan RMSE terkecil
results <- results[order(results$RMSE), ]

# Tampilkan hasil evaluasi semua model
print(results)
##                       Model d p q        AIC        BIC          MSE
## 1  SARIMA(0,1,1)(1,1,1)[12] 1 0 1 -11360.707 -11342.384 7.201621e+04
## 2  SARIMA(0,1,2)(1,1,1)[12] 1 0 2 -11465.278 -11442.375 8.230630e+04
## 5  SARIMA(2,1,1)(1,1,1)[12] 1 2 1 -11726.646 -11699.162 1.214462e+05
## 6  SARIMA(2,1,2)(1,1,1)[12] 1 2 2 -11718.135 -11686.071 1.235106e+05
## 3  SARIMA(1,1,1)(1,1,1)[12] 1 1 1 -11721.766 -11698.863 1.251153e+05
## 4  SARIMA(1,1,2)(1,1,1)[12] 1 1 2 -11719.766 -11692.282 1.251676e+05
## 8  SARIMA(0,2,2)(1,1,1)[12] 2 0 2  -7356.061  -7333.165 1.150220e+07
## 7  SARIMA(0,2,1)(1,1,1)[12] 2 0 1  -7356.300  -7337.983 1.591893e+07
## 9  SARIMA(1,2,1)(1,1,1)[12] 2 1 1  -7412.878  -7389.982 9.003120e+08
## 11 SARIMA(2,2,1)(1,1,1)[12] 2 2 1  -7538.585  -7511.110 9.340700e+08
## 10 SARIMA(1,2,2)(1,1,1)[12] 2 1 2  -7420.163  -7392.687 7.760726e+10
## 12 SARIMA(2,2,2)(1,1,1)[12] 2 2 2  -7992.634  -7960.579 1.037407e+14
##            RMSE         MAPE
## 1  2.683584e+02 5.766773e-02
## 2  2.868907e+02 6.007417e-02
## 5  3.484913e+02 9.329759e-02
## 6  3.514408e+02 9.425836e-02
## 3  3.537165e+02 9.507028e-02
## 4  3.537904e+02 9.509645e-02
## 8  3.391489e+03 8.289902e-01
## 7  3.989853e+03 9.687489e-01
## 9  3.000520e+04 6.857936e+00
## 11 3.056256e+04 6.907289e+00
## 10 2.785808e+05 5.374938e+01
## 12 1.018532e+07 1.471171e+03

Simulasi Model dengan auto ARIMA

model_auto <- auto.arima(ts_transformed, seasonal = TRUE, stepwise = FALSE, approximation = FALSE, trace = TRUE)
## 
##  ARIMA(0,1,0)(0,1,0)[12]                    : -11191.82
##  ARIMA(0,1,0)(0,1,1)[12]                    : -11197.37
##  ARIMA(0,1,0)(0,1,2)[12]                    : -11195.39
##  ARIMA(0,1,0)(1,1,0)[12]                    : -11197.49
##  ARIMA(0,1,0)(1,1,1)[12]                    : -11195.59
##  ARIMA(0,1,0)(1,1,2)[12]                    : -11193.58
##  ARIMA(0,1,0)(2,1,0)[12]                    : -11195.53
##  ARIMA(0,1,0)(2,1,1)[12]                    : -11193.58
##  ARIMA(0,1,0)(2,1,2)[12]                    : -11192.56
##  ARIMA(0,1,1)(0,1,0)[12]                    : -11336.15
##  ARIMA(0,1,1)(0,1,1)[12]                    : -11361.73
##  ARIMA(0,1,1)(0,1,2)[12]                    : -11360.38
##  ARIMA(0,1,1)(1,1,0)[12]                    : -11359.3
##  ARIMA(0,1,1)(1,1,1)[12]                    : -11360.65
##  ARIMA(0,1,1)(1,1,2)[12]                    : -11358.78
##  ARIMA(0,1,1)(2,1,0)[12]                    : -11359.31
##  ARIMA(0,1,1)(2,1,1)[12]                    : -11358.89
##  ARIMA(0,1,1)(2,1,2)[12]                    : -11356.9
##  ARIMA(0,1,2)(0,1,0)[12]                    : -11418.9
##  ARIMA(0,1,2)(0,1,1)[12]                    : -11465.68
##  ARIMA(0,1,2)(0,1,2)[12]                    : -11465.15
##  ARIMA(0,1,2)(1,1,0)[12]                    : -11457.9
##  ARIMA(0,1,2)(1,1,1)[12]                    : -11465.19
##  ARIMA(0,1,2)(1,1,2)[12]                    : Inf
##  ARIMA(0,1,2)(2,1,0)[12]                    : -11463.25
##  ARIMA(0,1,2)(2,1,1)[12]                    : -11463.16
##  ARIMA(0,1,3)(0,1,0)[12]                    : -11455.99
##  ARIMA(0,1,3)(0,1,1)[12]                    : -11516.86
##  ARIMA(0,1,3)(0,1,2)[12]                    : -11517.22
##  ARIMA(0,1,3)(1,1,0)[12]                    : -11503.81
##  ARIMA(0,1,3)(1,1,1)[12]                    : -11517.04
##  ARIMA(0,1,3)(2,1,0)[12]                    : -11514.76
##  ARIMA(0,1,4)(0,1,0)[12]                    : -11478.83
##  ARIMA(0,1,4)(0,1,1)[12]                    : -11555.97
##  ARIMA(0,1,4)(1,1,0)[12]                    : -11538.29
##  ARIMA(0,1,5)(0,1,0)[12]                    : -11492.37
##  ARIMA(1,1,0)(0,1,0)[12]                    : -11442.92
##  ARIMA(1,1,0)(0,1,1)[12]                    : -11525.88
##  ARIMA(1,1,0)(0,1,2)[12]                    : -11526.57
##  ARIMA(1,1,0)(1,1,0)[12]                    : -11502.61
##  ARIMA(1,1,0)(1,1,1)[12]                    : -11526.27
##  ARIMA(1,1,0)(1,1,2)[12]                    : -11528.62
##  ARIMA(1,1,0)(2,1,0)[12]                    : -11521.76
##  ARIMA(1,1,0)(2,1,1)[12]                    : -11524.8
##  ARIMA(1,1,0)(2,1,2)[12]                    : -11526.59
##  ARIMA(1,1,1)(0,1,0)[12]                    : -11518.13
##  ARIMA(1,1,1)(0,1,1)[12]                    : -11723.71
##  ARIMA(1,1,1)(0,1,2)[12]                    : -11721.68
##  ARIMA(1,1,1)(1,1,0)[12]                    : -11630.24
##  ARIMA(1,1,1)(1,1,1)[12]                    : -11721.68
##  ARIMA(1,1,1)(1,1,2)[12]                    : Inf
##  ARIMA(1,1,1)(2,1,0)[12]                    : -11680.88
##  ARIMA(1,1,1)(2,1,1)[12]                    : -11720.9
##  ARIMA(1,1,2)(0,1,0)[12]                    : -11518.76
##  ARIMA(1,1,2)(0,1,1)[12]                    : -11721.68
##  ARIMA(1,1,2)(0,1,2)[12]                    : -11719.65
##  ARIMA(1,1,2)(1,1,0)[12]                    : -11630.05
##  ARIMA(1,1,2)(1,1,1)[12]                    : -11719.65
##  ARIMA(1,1,2)(2,1,0)[12]                    : -11680.04
##  ARIMA(1,1,3)(0,1,0)[12]                    : -11516.78
##  ARIMA(1,1,3)(0,1,1)[12]                    : -11721.82
##  ARIMA(1,1,3)(1,1,0)[12]                    : -11628.09
##  ARIMA(1,1,4)(0,1,0)[12]                    : -11515.12
##  ARIMA(2,1,0)(0,1,0)[12]                    : -11510.18
##  ARIMA(2,1,0)(0,1,1)[12]                    : -11633.18
##  ARIMA(2,1,0)(0,1,2)[12]                    : -11632.38
##  ARIMA(2,1,0)(1,1,0)[12]                    : -11595.11
##  ARIMA(2,1,0)(1,1,1)[12]                    : -11632.26
##  ARIMA(2,1,0)(1,1,2)[12]                    : -11633.52
##  ARIMA(2,1,0)(2,1,0)[12]                    : -11624.28
##  ARIMA(2,1,0)(2,1,1)[12]                    : -11630.81
##  ARIMA(2,1,1)(0,1,0)[12]                    : -11518.83
##  ARIMA(2,1,1)(0,1,1)[12]                    : -11721.68
##  ARIMA(2,1,1)(0,1,2)[12]                    : -11719.65
##  ARIMA(2,1,1)(1,1,0)[12]                    : -11630.11
##  ARIMA(2,1,1)(1,1,1)[12]                    : Inf
##  ARIMA(2,1,1)(2,1,0)[12]                    : Inf
##  ARIMA(2,1,2)(0,1,0)[12]                    : -11516.8
##  ARIMA(2,1,2)(0,1,1)[12]                    : -11719.91
##  ARIMA(2,1,2)(1,1,0)[12]                    : Inf
##  ARIMA(2,1,3)(0,1,0)[12]                    : -11514.8
##  ARIMA(3,1,0)(0,1,0)[12]                    : -11516.66
##  ARIMA(3,1,0)(0,1,1)[12]                    : -11665.41
##  ARIMA(3,1,0)(0,1,2)[12]                    : -11663.46
##  ARIMA(3,1,0)(1,1,0)[12]                    : -11614.81
##  ARIMA(3,1,0)(1,1,1)[12]                    : -11663.45
##  ARIMA(3,1,0)(2,1,0)[12]                    : -11651.03
##  ARIMA(3,1,1)(0,1,0)[12]                    : -11516.81
##  ARIMA(3,1,1)(0,1,1)[12]                    : -11721.9
##  ARIMA(3,1,1)(1,1,0)[12]                    : -11628.14
##  ARIMA(3,1,2)(0,1,0)[12]                    : -11543.15
##  ARIMA(4,1,0)(0,1,0)[12]                    : -11516.39
##  ARIMA(4,1,0)(0,1,1)[12]                    : -11685.31
##  ARIMA(4,1,0)(1,1,0)[12]                    : -11622.67
##  ARIMA(4,1,1)(0,1,0)[12]                    : -11514.84
##  ARIMA(5,1,0)(0,1,0)[12]                    : -11514.9
## 
## 
## 
##  Best model: ARIMA(1,1,1)(0,1,1)[12]
summary(model_auto)
## Series: ts_transformed 
## ARIMA(1,1,1)(0,1,1)[12] 
## 
## Coefficients:
##          ar1      ma1     sma1
##       0.9749  -0.7212  -0.7520
## s.e.  0.0066   0.0211   0.0336
## 
## sigma^2 = 2.589e-06:  log likelihood = 5865.88
## AIC=-11723.77   AICc=-11723.71   BIC=-11705.44
## 
## Training set error measures:
##                         ME      RMSE          MAE           MPE        MAPE
## Training set -4.393962e-05 0.0015915 0.0001346049 -0.0003653908 0.001114708
##                    MASE      ACF1
## Training set 0.01170833 0.1525042

Estimasi Parameter

summary(model_auto)
## Series: ts_transformed 
## ARIMA(1,1,1)(0,1,1)[12] 
## 
## Coefficients:
##          ar1      ma1     sma1
##       0.9749  -0.7212  -0.7520
## s.e.  0.0066   0.0211   0.0336
## 
## sigma^2 = 2.589e-06:  log likelihood = 5865.88
## AIC=-11723.77   AICc=-11723.71   BIC=-11705.44
## 
## Training set error measures:
##                         ME      RMSE          MAE           MPE        MAPE
## Training set -4.393962e-05 0.0015915 0.0001346049 -0.0003653908 0.001114708
##                    MASE      ACF1
## Training set 0.01170833 0.1525042

Uji Deterministik

checkresiduals(model_auto)

## 
##  Ljung-Box test
## 
## data:  Residuals from ARIMA(1,1,1)(0,1,1)[12]
## Q* = 45.532, df = 21, p-value = 0.001474
## 
## Model df: 3.   Total lags used: 24
res_model = residuals(model_auto)
acf(res_model)

qqnorm(res_model)
qqline(res_model)

Forecasting & Evaluasi

plot(ts_transformed, col='blue')
lines(fitted.values(model_auto), col='red')

library(forecast)
library(ggplot2)
library(dplyr)

# Forecast sepanjang ts_test
h <- length(ts_test)
forecast_model <- forecast(model_auto, h = h)

# Kembalikan forecast dari skala log ke asli
forecast_original <- exp(forecast_model$mean)

# Data frame untuk plotting
df_train <- data.frame(
  Time = time(ts_data),
  Value = exp(as.numeric(ts_data)),  # Kembalikan train juga dari log
  Type = "Train"
)

df_test_plot <- data.frame(
  Time = time(ts_test),
  Value = as.numeric(ts_test),
  Type = "Test"
)

df_forecast <- data.frame(
  Time = time(ts_test),
  Value = as.numeric(forecast_original),
  Type = "Forecast"
)

# Gabungkan semua data untuk plot
df_all <- bind_rows(df_train, df_test_plot, df_forecast)

# Plot
ggplot(df_all, aes(x = Time, y = Value, color = Type)) +
  geom_line(size = 1) +
  labs(title = "Forecast vs Data Asli (SARIMA(1,0,1)(1,1,0)[12])",
       y = "Sales", x = "Time", color = "Keterangan") +
  scale_color_manual(values = c("Train" = "black", "Test" = "red", "Forecast" = "blue")) +
  theme_minimal() +
  theme(plot.title = element_text(size = 14, face = "bold"))

# Evaluasi (dalam skala asli)
mse <- mean((ts_test - forecast_original)^2)
rmse <- sqrt(mse)
mape <- mean(abs((ts_test - forecast_original) / ts_test)) * 100

# Ambil AIC dan BIC dari model
aic <- AIC(model_auto)
bic <- BIC(model_auto)

# Tampilkan hasil evaluasi
cat("Model: SARIMA(1,1,1)(0,1,1)[12]\n")
## Model: SARIMA(1,1,1)(0,1,1)[12]
cat("AIC  :", round(aic, 3), "\n")
## AIC  : -11723.76
cat("BIC  :", round(bic, 3), "\n")
## BIC  : -11705.44
cat("MSE  :", round(mse, 2), "\n")
## MSE  : 124496.5
cat("RMSE :", round(rmse, 2), "\n")
## RMSE : 352.84
cat("MAPE :", round(mape, 2), "%\n")
## MAPE : 0.09 %
df_train <- data.frame(
  Time = time(ts_transformed),  # waktu dari data train
  Value = exp(as.numeric(ts_transformed)),  # kembalikan dari log
  Type = "Train"
)

df_test <- data.frame(
  Time = time(ts_test),
  Value = as.numeric(ts_test),
  Type = "Test (Aktual)"
)

df_forecast <- data.frame(
  Time = time(ts_test),
  Value = as.numeric(exp(forecast_model$mean)),
  Type = "Test (Forecast)"
)

df_all <- bind_rows(df_train, df_test, df_forecast)

ggplot(df_all, aes(x = Time, y = Value, color = Type)) +
  geom_line(size = 1.2) +
  labs(
    title = "Visualisasi Forecast vs Data Aktual",
    x = "Waktu",
    y = "Sales (Skala Asli)",
    color = "Keterangan"
  ) +
  scale_color_manual(values = c(
    "Train" = "black",
    "Test (Aktual)" = "red",
    "Test (Forecast)" = "blue"
  )) +
  theme_minimal() +
  theme(plot.title = element_text(size = 14, face = "bold"))

# Latih model pada seluruh dataset (bukan hanya 90%)
model_auto_full <- auto.arima(dataset, seasonal = TRUE)

# Forecast 12 periode ke depan (misalnya bulan)
forecast_future <- forecast(model_auto_full, h = 24)

# Visualisasi hasil forecasting
autoplot(forecast_future) +
  labs(title = "Forecasting 24 bulan ke Depan dari Data Terakhir",
       x = "Tahun", y = "Nilai")

# Konversi hasil forecast ke dalam bentuk data frame
forecast_df <- as.data.frame(forecast_future)

# Tampilkan 24 hasil forecast (termasuk interval prediksi)
print(forecast_df)
##          Point Forecast    Lo 80    Hi 80    Lo 95    Hi 95
## Jan 2020       330460.6 330429.0 330492.3 330412.2 330509.0
## Feb 2020       330595.5 330542.3 330648.7 330514.2 330676.9
## Mar 2020       330726.3 330650.8 330801.8 330610.8 330841.8
## Apr 2020       330866.2 330767.0 330965.3 330714.5 331017.8
## May 2020       331019.1 330894.9 331143.3 330829.2 331209.0
## Jun 2020       331185.8 331035.3 331336.3 330955.7 331416.0
## Jul 2020       331364.9 331186.9 331543.0 331092.7 331637.2
## Aug 2020       331560.1 331353.4 331766.8 331244.0 331876.2
## Sep 2020       331758.4 331522.0 331994.8 331396.8 332119.9
## Oct 2020       331954.3 331687.2 332221.4 331545.8 332362.8
## Nov 2020       332129.6 331830.9 332428.2 331672.8 332586.3
## Dec 2020       332287.3 331956.2 332618.3 331781.0 332793.6
## Jan 2021       332439.5 332072.5 332806.5 331878.3 333000.8
## Feb 2021       332575.4 332171.1 332979.6 331957.1 333193.6
## Mar 2021       332707.5 332264.8 333150.2 332030.5 333384.5
## Apr 2021       332848.3 332366.1 333330.4 332110.8 333585.7
## May 2021       333002.3 332479.6 333524.9 332203.0 333801.6
## Jun 2021       333170.4 332606.4 333734.4 332307.8 334033.0
## Jul 2021       333350.7 332744.5 333956.9 332423.6 334277.8
## Aug 2021       333547.1 332898.0 334196.2 332554.4 334539.8
## Sep 2021       333746.9 333054.2 334439.6 332687.5 334806.3
## Oct 2021       333944.1 333207.2 334681.1 332817.1 335071.2
## Nov 2021       334120.8 333339.1 334902.6 332925.2 335316.4
## Dec 2021       334279.9 333452.8 335107.0 333015.0 335544.8
prediksi_24bulan <- forecast_df$`Point Forecast`
print(prediksi_24bulan)
##  [1] 330460.6 330595.5 330726.3 330866.2 331019.1 331185.8 331364.9 331560.1
##  [9] 331758.4 331954.3 332129.6 332287.3 332439.5 332575.4 332707.5 332848.3
## [17] 333002.3 333170.4 333350.7 333547.1 333746.9 333944.1 334120.8 334279.9