Dosen Pengampu: Prof. Dr. Suhartono, M.Kom

Mata Kuliah: Linear Algebra

Prodi: Teknik Informatika

Lembaga: Universitas Islam Negeri Maulana Malik Ibrahim Malang

1 Penjelasan Tentang Smooting Splines

Smoothing splines adalah pendekatan yang ampuh untuk memperkirakan hubungan fungsional antara prediktor X dan respons Y. Smoothing splines dapat ditampung baik menggunakan fungsi smooth.spline (dalam paket stats) atau fungsi ss (dalam paket npreg). Dokumen ini memberikan latar belakang teoritis tentang smoothing splines, serta contoh-contoh yang menggambarkan bagaimana menggunakan fungsi smooth.spline dan ss. Seperti yang saya tunjukkan dalam tutorial ini, kedua fungsi memiliki sintaks yang sangat mirip, tetapi fungsi ss menawarkan beberapa opsi tambahan. Dibandingkan dengan fungsi smooth.spline, fungsi ss memiliki

lebih banyak metode pemilihan parameter penghalusan lebih banyak jenis spline (linier, kubik, quintic) opsi untuk batasan periodisitas opsi untuk nilai simpul yang ditentukan pengguna ringkasan yang sesuai dan metode plot

Untuk melihat bagaimana kinerja fungsi-fungsi ini dalam praktiknya, mari kita lihat contoh simulasinya. Secara khusus, mari simulasikan beberapa data dengan hubungan fungsional (berkala) yang memiliki beberapa gangguan. Berikut Analisa Data dengan Regresi Nonparametrik dengan Pendekatan Smooting Spline Pada Data Google Mobility Index dan Covid-19 di Jakarta Agustus 2020.

2 Data Google Mobility Index dan Covid-19 di Jakarta Agustus 2020

## Import the data
library(readxl)
datacovidmobility <- read_excel(path = "Data_Covid_DKI_Agustus.xlsx")
datacovidmobility
plot(datacovidmobility$Tanggal,datacovidmobility$POSITIF)

3 Data Deskriptif

library(jmv)
## Warning: package 'jmv' was built under R version 4.1.3
# Use the descritptives function to get the descritptive data
descriptives(datacovidmobility, vars = vars(POSITIF, retail_and_recreation_percent_change_from_baseline), freq = TRUE)
## 
##  DESCRIPTIVES
## 
##  Descriptives                                                                             
##  ---------------------------------------------------------------------------------------- 
##                          POSITIF     retail_and_recreation_percent_change_from_baseline   
##  ---------------------------------------------------------------------------------------- 
##    N                           31                                                    31   
##    Missing                      0                                                     0   
##    Mean                  29854.23                                             -28.32258   
##    Median                29554.00                                             -27.00000   
##    Standard deviation    5474.232                                              4.061093   
##    Minimum               21575.00                                             -37.00000   
##    Maximum               40309.00                                             -21.00000   
##  ----------------------------------------------------------------------------------------
hist(datacovidmobility$POSITIF, main="Distribusi Positif di RS untuk Covid 19", xlab="Positif di RS Untuk Covid 19", xlim=c(3000,6000),ylim=c(0,15)) # histogram variabel wt
mean=mean(datacovidmobility$POSITIF) # Menghitung nilai mean variabel wt
abline(v=mean, col="green",lwd=2) # Menambahkan garis vertical pada plot

# 2. Density plot
d<-density(datacovidmobility$POSITIF) # menghitung density variabel wt
plot(d,  xlab="Positif di RS untuk Covid 19", main="Distribusi Positif di RS untuk Covid 19")

4 Perbandingan Pendekatan

library(npreg)
## Warning: package 'npreg' was built under R version 4.1.3
# fit using ss
mod.ss <- ss( datacovidmobility$Tanggal,datacovidmobility$POSITIF, nknots = 10)
mod.ss
## 
## Call:
## ss(x = datacovidmobility$Tanggal, y = datacovidmobility$POSITIF, 
##     nknots = 10)
## 
## Smoothing Parameter  spar = 0.1570775   lambda = 8.130566e-07
## Equivalent Degrees of Freedom (Df) 9.619984
## Penalized Criterion (RSS) 77336.97
## Generalized Cross-Validation (GCV) 5244.846
# fit using smooth.spline
mod.smsp <- smooth.spline( datacovidmobility$Tanggal,datacovidmobility$POSITIF, nknots = 10)
mod.smsp
## Call:
## smooth.spline(x = datacovidmobility$Tanggal, y = datacovidmobility$POSITIF, 
##     nknots = 10)
## 
## Smoothing Parameter  spar= 0.09083055  lambda= 1.184222e-05 (16 iterations)
## Equivalent Degrees of Freedom (Df): 10.58814
## Penalized Criterion (RSS): 66356.69
## GCV: 4937.205
# rmse between solutions
sqrt(mean(( mod.ss$y - mod.smsp$y )^2))
## [1] 10.33558
# rmse between solutions and f(x)
sqrt(mean(( datacovidmobility$POSITIF - mod.ss$y )^2))
## [1] 49.94738
sqrt(mean(( datacovidmobility$POSITIF - mod.smsp$y )^2))
## [1] 46.26595
datacovidmobility$prediksi_ss <- mod.ss$y
datacovidmobility$prediksi_smsp <- mod.smsp$y
datacovidmobility
# plot results
plot(datacovidmobility$Tanggal, datacovidmobility$POSITIF)
lines(datacovidmobility$Tanggal, datacovidmobility$prediksi_ss , lty = 2, col = 'black', lwd = 4)
lines(datacovidmobility$Tanggal, datacovidmobility$prediksi_smsp, lty = 4, col = 'pink', lwd = 2)

# plot method
plot(datacovidmobility$Tanggal, datacovidmobility$POSITIF, lty = 10, col = 'black', lwd =5)
#plot(mod.ss)
# add lm fit
abline(coef(lm( datacovidmobility$POSITIF ~ datacovidmobility$Tanggal  , data = datacovidmobility)), lty = 10, col = 'blue', lwd =5)
rug(datacovidmobility$Tanggal)  # add rug to plot
#points(datacovidmobility$Tanggal, datacovidmobility$Dirawat, lty = 2, col = 2, lwd = 2)
points(datacovidmobility$Tanggal,mod.ss$y , lty = 2, col = 'red', lwd = 4)
#points(datacovidmobility$Tanggal,mod.smsp$y , lty = 2, col = 4, lwd = 2)
legend("topright", 
       legend = c("Real", "Model SS", "Trends"), 
       lty = 1:3, col = c("Black","Red","Blue"), lwd = 3, bty = "p")

library(npreg)
# fit using ss
mod.ss1 <- ss( datacovidmobility$Tanggal,datacovidmobility$retail_and_recreation_percent_change_from_baseline, nknots = 10)
mod.ss1
## 
## Call:
## ss(x = datacovidmobility$Tanggal, y = datacovidmobility$retail_and_recreation_percent_change_from_baseline, 
##     nknots = 10)
## 
## Smoothing Parameter  spar = 0.3686358   lambda = 2.74518e-05
## Equivalent Degrees of Freedom (Df) 5.788914
## Penalized Criterion (RSS) 249.0038
## Generalized Cross-Validation (GCV) 12.14464
# fit using smooth.spline
mod.smsp1 <- smooth.spline( datacovidmobility$Tanggal,datacovidmobility$retail_and_recreation_percent_change_from_baseline, nknots = 10)
mod.smsp1
## Call:
## smooth.spline(x = datacovidmobility$Tanggal, y = datacovidmobility$retail_and_recreation_percent_change_from_baseline, 
##     nknots = 10)
## 
## Smoothing Parameter  spar= 0.3479396  lambda= 0.0008521042 (10 iterations)
## Equivalent Degrees of Freedom (Df): 5.797622
## Penalized Criterion (RSS): 248.853
## GCV: 12.14567
datacovidmobility$prediksi_ss1 <- mod.ss1$y
datacovidmobility$prediksi_smsp1 <- mod.smsp1$y
datacovidmobility
# plot results
plot(datacovidmobility$Tanggal, datacovidmobility$retail_and_recreation_percent_change_from_baseline, lty = 10, col = 'black', lwd =5)
# add lm fit
abline(coef(lm( datacovidmobility$retail_and_recreation_percent_change_from_baseline ~ datacovidmobility$Tanggal  , data = datacovidmobility)), lty = 10, col = 'blue', lwd =5)
lines(datacovidmobility$Tanggal, datacovidmobility$prediksi_ss1 , lty = 2, col = 'black', lwd = 4)
lines(datacovidmobility$Tanggal, datacovidmobility$prediksi_smsp1, lty = 4, col = 'red', lwd = 2)

5 Korelasi Pearson

Untuk melakukan analisis korelasi Pearson cukup sederhana. Semisal kita ingin mengetahui hubungan antara mobilityjakartaPOSITIFdanmobilityjakartaretail_and_recreation_percent_change_from_baseline,

cor.test(datacovidmobility$POSITIF,datacovidmobility$retail_and_recreation_percent_change_from_baseline)
## 
##  Pearson's product-moment correlation
## 
## data:  datacovidmobility$POSITIF and datacovidmobility$retail_and_recreation_percent_change_from_baseline
## t = 1.9056, df = 29, p-value = 0.06666
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.02353599  0.61520699
## sample estimates:
##       cor 
## 0.3335862

Dari output tersebut dapat kita simpulkan bahwa tidak ada hubungan yang signifikan antara mobilityjakartaPOSITIF dan mobilityjakartaretail_and_recreation_percent_change_from_baseline (p-value<0,01). Nilai p-value dalam output R dituliskan 0.06666 Nilai koefisien korelasi r adalah sebesar 0.3335862 yang menunjukkan hubungan yang lemah dan terbalik.

6 Hasil Regresi dengan Pendekatan Smoothing Spline

model <- lm(datacovidmobility$POSITIF ~ datacovidmobility$retail_and_recreation_percent_change_from_baseline, data = datacovidmobility)
summary(model)
## 
## Call:
## lm(formula = datacovidmobility$POSITIF ~ datacovidmobility$retail_and_recreation_percent_change_from_baseline, 
##     data = datacovidmobility)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -8005.9 -4187.8   263.4  4081.7  9280.7 
## 
## Coefficients:
##                                                                      Estimate
## (Intercept)                                                           42589.9
## datacovidmobility$retail_and_recreation_percent_change_from_baseline    449.7
##                                                                      Std. Error
## (Intercept)                                                              6749.5
## datacovidmobility$retail_and_recreation_percent_change_from_baseline      236.0
##                                                                      t value
## (Intercept)                                                            6.310
## datacovidmobility$retail_and_recreation_percent_change_from_baseline   1.906
##                                                                      Pr(>|t|)
## (Intercept)                                                          6.83e-07
## datacovidmobility$retail_and_recreation_percent_change_from_baseline   0.0667
##                                                                         
## (Intercept)                                                          ***
## datacovidmobility$retail_and_recreation_percent_change_from_baseline .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5249 on 29 degrees of freedom
## Multiple R-squared:  0.1113, Adjusted R-squared:  0.08063 
## F-statistic: 3.631 on 1 and 29 DF,  p-value: 0.06666

Rumus linear regresi menjadi y = 42589.9 + (449.7) datacovidmobility$retail_and_recreation_percent_change_from_baseline

model$coefficients
##                                                          (Intercept) 
##                                                           42589.8773 
## datacovidmobility$retail_and_recreation_percent_change_from_baseline 
##                                                             449.6642
model$fitted.values
##        1        2        3        4        5        6        7        8 
## 25952.30 25952.30 30448.94 29999.28 30448.94 30448.94 32247.60 30448.94 
##        9       10       11       12       13       14       15       16 
## 28200.62 31348.27 29999.28 30898.61 28200.62 32247.60 29999.28 27750.96 
##       17       18       19       20       21       22       23       24 
## 28200.62 28650.29 30898.61 28200.62 29549.61 28200.62 26851.63 30448.94 
##       25       26       27       28       29       30       31 
## 30898.61 30898.61 31348.27 33146.93 31348.27 29999.28 32247.60
datacovidmobility$prediksi_linear <- model$fitted.values
datacovidmobility
# Function to add histograms
panel.hist <- function(x, ...) {
    usr <- par("usr")
    on.exit(par(usr))
    par(usr = c(usr[1:2], 0, 1.5))
    his <- hist(x, plot = FALSE)
    breaks <- his$breaks
    nB <- length(breaks)
    y <- his$counts
    y <- y/max(y)
    rect(breaks[-nB], 0, breaks[-1], y, col = rgb(0, 1, 1, alpha = 0.5), ...)
    # lines(density(x), col = 2, lwd = 2) # Uncomment to add density lines
}
# Equivalent with a formula
pairs(datacovidmobility$POSITIF~datacovidmobility$retail_and_recreation_percent_change_from_baseline, data = datacovidmobility,
      upper.panel = NULL,         # Disabling the upper panel
      diag.panel = panel.hist)    # Adding the histograms

# plot method
plot(datacovidmobility$Tanggal, datacovidmobility$POSITIF, lty = 10, col = 'black', lwd =5)
#plot(mod.ss)
# add lm fit
abline(coef(lm( datacovidmobility$POSITIF ~ datacovidmobility$Tanggal  , data = datacovidmobility)), lty = 10, col = 'blue', lwd =5)
rug(datacovidmobility$Tanggal)  # add rug to plot
#points(datacovidmobility$Tanggal, datacovidmobility$Dirawat, lty = 2, col = 2, lwd = 2)
lines(datacovidmobility$Tanggal,mod.ss$y , lty = 2, col = 'red', lwd = 4)
#points(datacovidmobility$Tanggal,mod.smsp$y , lty = 2, col = 4, lwd = 2)
lines(datacovidmobility$Tanggal, datacovidmobility$prediksi_linear, lty = 2, col = 6, lwd = 4)
legend("topright", 
       legend = c("Real", "Model SS", "Trends", "Linear Regresi"), 
       lty = 1:4, col = c("Black","Red","Blue","Pink"), lwd = 3, bty = "p")