Tugas Kelompok “Membuat Regresi Linier Berganda Menggunakan Data Covid Pasien Sembuh DKI Jakarta dan Google Mobility Index pada Bulan Desember 2020”

Hafiz Daniswara

06 April 2022

Regresi linier merupakan metode yang digunakan dalam memperoleh hubungan antara 1 variable dependen dengan 1 atau lebih variabel independen. Apabila variable idependen yang digunakan hanya satu maka disebut regresi linear sederhana sedangkan apabila lebih dari satu maka disebut regresi linear berganda. Berikut contoh regresi linear berganda pada data positif Covid-19 & Google Mobility Index pada bulan Desember 2020.

Data Sembuh Positif Covid-19 dan Google Mobility Index di Jakarta pada Bulan Desember 2020

library(readxl)
datainflowjakarta <- read_excel(path = "datainflowjakarta.xlsx")
datainflowjakarta
## # A tibble: 31 x 12
##    Tanggal             POSITIF Dirawat Sembuh Meninggal `Self Isolation`
##    <dttm>                <dbl>   <dbl>  <dbl>     <dbl>            <dbl>
##  1 2020-12-31 00:00:00  183735    5821 164881      3287             9746
##  2 2020-12-30 00:00:00  181713    5654 162911      3266             9882
##  3 2020-12-29 00:00:00  179660    5059 161337      3246            10018
##  4 2020-12-28 00:00:00  177604    4581 159878      3226             9919
##  5 2020-12-27 00:00:00  175926    4452 158615      3204             9655
##  6 2020-12-26 00:00:00  173929    4510 156798      3182             9439
##  7 2020-12-25 00:00:00  171871    5742 154242      3167             8720
##  8 2020-12-24 00:00:00  169775    5305 152491      3146             8833
##  9 2020-12-23 00:00:00  167842    4947 151122      3130             8643
## 10 2020-12-22 00:00:00  165888    4462 149691      3115             8620
## # ... with 21 more rows, and 6 more variables:
## #   retail_and_recreation_percent_change_from_baseline <dbl>,
## #   grocery_and_pharmacy_percent_change_from_baseline <dbl>,
## #   parks_percent_change_from_baseline <dbl>,
## #   transit_stations_percent_change_from_baseline <dbl>,
## #   workplaces_percent_change_from_baseline <dbl>,
## #   residential_percent_change_from_baseline <dbl>
summary(datainflowjakarta)
##     Tanggal                       POSITIF          Dirawat         Sembuh      
##  Min.   :2020-12-01 00:00:00   Min.   :137919   Min.   :1861   Min.   :125102  
##  1st Qu.:2020-12-08 12:00:00   1st Qu.:147220   1st Qu.:2297   1st Qu.:132783  
##  Median :2020-12-16 00:00:00   Median :156343   Median :3705   Median :141365  
##  Mean   :2020-12-16 00:00:00   Mean   :158331   Mean   :3725   Mean   :142770  
##  3rd Qu.:2020-12-23 12:00:00   3rd Qu.:168809   3rd Qu.:4944   3rd Qu.:151807  
##  Max.   :2020-12-31 00:00:00   Max.   :183735   Max.   :5821   Max.   :164881  
##    Meninggal    Self Isolation 
##  Min.   :2689   Min.   : 7673  
##  1st Qu.:2851   1st Qu.: 8232  
##  Median :3010   Median : 8720  
##  Mean   :2997   Mean   : 8840  
##  3rd Qu.:3138   3rd Qu.: 9456  
##  Max.   :3287   Max.   :10018  
##  retail_and_recreation_percent_change_from_baseline
##  Min.   :-36.00                                    
##  1st Qu.:-28.50                                    
##  Median :-27.00                                    
##  Mean   :-27.48                                    
##  3rd Qu.:-25.00                                    
##  Max.   :-23.00                                    
##  grocery_and_pharmacy_percent_change_from_baseline
##  Min.   :-17.000                                  
##  1st Qu.: -8.500                                  
##  Median : -6.000                                  
##  Mean   : -6.548                                  
##  3rd Qu.: -3.500                                  
##  Max.   :  1.000                                  
##  parks_percent_change_from_baseline
##  Min.   :-59.00                    
##  1st Qu.:-49.50                    
##  Median :-47.00                    
##  Mean   :-47.52                    
##  3rd Qu.:-45.50                    
##  Max.   :-37.00                    
##  transit_stations_percent_change_from_baseline
##  Min.   :-53.00                               
##  1st Qu.:-39.00                               
##  Median :-36.00                               
##  Mean   :-36.39                               
##  3rd Qu.:-33.50                               
##  Max.   :-27.00                               
##  workplaces_percent_change_from_baseline
##  Min.   :-66.00                         
##  1st Qu.:-32.50                         
##  Median :-30.00                         
##  Mean   :-30.77                         
##  3rd Qu.:-27.50                         
##  Max.   :-10.00                         
##  residential_percent_change_from_baseline
##  Min.   : 6.00                           
##  1st Qu.:10.50                           
##  Median :12.00                           
##  Mean   :11.87                           
##  3rd Qu.:13.00                           
##  Max.   :22.00
pairs(datainflowjakarta)

pairs(datainflowjakarta, lower.panel=NULL)

plot(datainflowjakarta$Sembuh ~ datainflowjakarta$Tanggal, data = datainflowjakarta)

plot(datainflowjakarta$Sembuh ~ datainflowjakarta$
              retail_and_recreation_percent_change_from_baseline+datainflowjakarta$
      grocery_and_pharmacy_percent_change_from_baseline+datainflowjakarta$
      parks_percent_change_from_baseline+datainflowjakarta$
      transit_stations_percent_change_from_baseline+datainflowjakarta$
      workplaces_percent_change_from_baseline+datainflowjakarta$
      residential_percent_change_from_baseline, data = datainflowjakarta)

cor(datainflowjakarta$Sembuh,datainflowjakarta$
      retail_and_recreation_percent_change_from_baseline)
## [1] 0.4534586
cor(datainflowjakarta$Sembuh,datainflowjakarta$
      grocery_and_pharmacy_percent_change_from_baseline)
## [1] 0.3076185
cor(datainflowjakarta$Sembuh,datainflowjakarta$
      parks_percent_change_from_baseline)
## [1] -0.05979146
cor(datainflowjakarta$Sembuh,datainflowjakarta$
      transit_stations_percent_change_from_baseline)
## [1] 0.1504269
cor(datainflowjakarta$Sembuh,datainflowjakarta$
      workplaces_percent_change_from_baseline)
## [1] 0.2328853
cor(datainflowjakarta$Sembuh,datainflowjakarta$
      residential_percent_change_from_baseline)
## [1] -0.2366697
model <- lm(datainflowjakarta$Sembuh ~ datainflowjakarta$Tanggal, data = datainflowjakarta)

summary(model)
## 
## Call:
## lm(formula = datainflowjakarta$Sembuh ~ datainflowjakarta$Tanggal, 
##     data = datainflowjakarta)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1643.9 -1046.9  -347.6  1194.4  2411.7 
## 
## Coefficients:
##                             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)               -2.430e+07  4.720e+05  -51.48   <2e-16 ***
## datainflowjakarta$Tanggal  1.520e-02  2.935e-04   51.79   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1263 on 29 degrees of freedom
## Multiple R-squared:  0.9893, Adjusted R-squared:  0.9889 
## F-statistic:  2682 on 1 and 29 DF,  p-value: < 2.2e-16
anova(model)
## Analysis of Variance Table
## 
## Response: datainflowjakarta$Sembuh
##                           Df     Sum Sq    Mean Sq F value    Pr(>F)    
## datainflowjakarta$Tanggal  1 4277268385 4277268385  2681.9 < 2.2e-16 ***
## Residuals                 29   46251260    1594871                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
plot(datainflowjakarta$Sembuh ~ datainflowjakarta$
              Tanggal, 
     data = datainflowjakarta, col = "Salmon", pch = 20, cex = 1.5, 
     main = "Data Covid Pasien Sembuh di DKI Jakarta dan Google Mobility Index")
abline(model)

plot(cooks.distance(model), pch = 16, col = "Magenta")

plot(model)

AIC(model)
## [1] 534.6582
BIC(model)
## [1] 538.9601
head(predict(model), n = 11)
##        1        2        3        4        5        6        7        8 
## 162469.3 161156.0 159842.7 158529.5 157216.2 155902.9 154589.6 153276.3 
##        9       10       11 
## 151963.1 150649.8 149336.5
plot(head(predict(model), n = 10))

head(resid(model), n = 11)
##          1          2          3          4          5          6          7 
##  2411.6996  1754.9798  1494.2601  1348.5403  1398.8206   895.1008  -347.6190 
##          8          9         10         11 
##  -785.3387  -841.0585  -958.7782 -1028.4980
coef(model)
##               (Intercept) datainflowjakarta$Tanggal 
##                 -2.43e+07                  1.52e-02
datainflowjakarta$residuals <- model$residuals
datainflowjakarta$predicted <- model$fitted.values
datainflowjakarta
## # A tibble: 31 x 14
##    Tanggal             POSITIF Dirawat Sembuh Meninggal `Self Isolation`
##    <dttm>                <dbl>   <dbl>  <dbl>     <dbl>            <dbl>
##  1 2020-12-31 00:00:00  183735    5821 164881      3287             9746
##  2 2020-12-30 00:00:00  181713    5654 162911      3266             9882
##  3 2020-12-29 00:00:00  179660    5059 161337      3246            10018
##  4 2020-12-28 00:00:00  177604    4581 159878      3226             9919
##  5 2020-12-27 00:00:00  175926    4452 158615      3204             9655
##  6 2020-12-26 00:00:00  173929    4510 156798      3182             9439
##  7 2020-12-25 00:00:00  171871    5742 154242      3167             8720
##  8 2020-12-24 00:00:00  169775    5305 152491      3146             8833
##  9 2020-12-23 00:00:00  167842    4947 151122      3130             8643
## 10 2020-12-22 00:00:00  165888    4462 149691      3115             8620
## # ... with 21 more rows, and 8 more variables:
## #   retail_and_recreation_percent_change_from_baseline <dbl>,
## #   grocery_and_pharmacy_percent_change_from_baseline <dbl>,
## #   parks_percent_change_from_baseline <dbl>,
## #   transit_stations_percent_change_from_baseline <dbl>,
## #   workplaces_percent_change_from_baseline <dbl>,
## #   residential_percent_change_from_baseline <dbl>, residuals <dbl>, ...
scatter.smooth(x=datainflowjakarta$Tanggal, y=datainflowjakarta$Sembuh, 
               main="Tanggal ~ Sembuh")

boxplot(datainflowjakarta$Sembuh, main="Sembuh", 
        boxplot.stats(datainflowjakarta$Sembuh)$out)

plot(density(datainflowjakarta$Sembuh), main="Google Mobility Index: Sembuh", 
     ylab="Frequency")

coefs <- coef(model)
plot(Sembuh ~ Tanggal, data = datainflowjakarta)
abline(coefs)
text(x = 12, y = 10, paste('expression = ', round(coefs[1], 2),  '+', 
                           round(coefs[2], 2), '*Sembuh'))

cor.test(datainflowjakarta$
 retail_and_recreation_percent_change_from_baseline, 
         datainflowjakarta$Sembuh) 
## 
##  Pearson's product-moment correlation
## 
## data:  datainflowjakarta$retail_and_recreation_percent_change_from_baseline and datainflowjakarta$Sembuh
## t = 2.7398, df = 29, p-value = 0.01041
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.1180936 0.6959711
## sample estimates:
##       cor 
## 0.4534586
cor.test(datainflowjakarta$
 grocery_and_pharmacy_percent_change_from_baseline, 
         datainflowjakarta$Sembuh)
## 
##  Pearson's product-moment correlation
## 
## data:  datainflowjakarta$grocery_and_pharmacy_percent_change_from_baseline and datainflowjakarta$Sembuh
## t = 1.741, df = 29, p-value = 0.09229
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.05243742  0.59689599
## sample estimates:
##       cor 
## 0.3076185
cor.test(datainflowjakarta$
 parks_percent_change_from_baseline, 
         datainflowjakarta$Sembuh)
## 
##  Pearson's product-moment correlation
## 
## data:  datainflowjakarta$parks_percent_change_from_baseline and datainflowjakarta$Sembuh
## t = -0.32256, df = 29, p-value = 0.7493
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.4055396  0.3009242
## sample estimates:
##         cor 
## -0.05979146
cor.test(datainflowjakarta$
 transit_stations_percent_change_from_baseline, 
         datainflowjakarta$Sembuh)
## 
##  Pearson's product-moment correlation
## 
## data:  datainflowjakarta$transit_stations_percent_change_from_baseline and datainflowjakarta$Sembuh
## t = 0.8194, df = 29, p-value = 0.4192
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.2153943  0.4792233
## sample estimates:
##       cor 
## 0.1504269
cor.test(datainflowjakarta$
 workplaces_percent_change_from_baseline, 
         datainflowjakarta$Sembuh)
## 
##  Pearson's product-moment correlation
## 
## data:  datainflowjakarta$workplaces_percent_change_from_baseline and datainflowjakarta$Sembuh
## t = 1.2896, df = 29, p-value = 0.2074
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.1323788  0.5424612
## sample estimates:
##       cor 
## 0.2328853
cor.test(datainflowjakarta$
 residential_percent_change_from_baseline, 
         datainflowjakarta$Sembuh)
## 
##  Pearson's product-moment correlation
## 
## data:  datainflowjakarta$residential_percent_change_from_baseline and datainflowjakarta$Sembuh
## t = -1.3118, df = 29, p-value = 0.1999
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.5452817  0.1284417
## sample estimates:
##        cor 
## -0.2366697