1.Pengertian Linear Berganda

Regresi Linear Berganda adalah model regresi linear dengan melibatkan lebih dari satu variable bebas atau predictor. Dalam bahasa inggris, istilah ini disebut dengan multiple linear regression.

2.Mengekspor sebuah data dari excel dan menampilkannya pada Rstudio

library(readxl)
## Warning: package 'readxl' was built under R version 4.1.2
datacovidjuli2021 <- read_excel(path = "D:/Matkul Sem2/Linear Algebra/data covid positif jakarta bulan juli 2021.xlsx")
datacovidjuli2021
## # A tibble: 31 x 8
##    Tanggal             POSITIF retail_and_recreation_pe~ grocery_and_pharmacy_p~
##    <dttm>                <dbl>                     <dbl>                   <dbl>
##  1 2021-07-01 00:00:00  551009                       -42                      -5
##  2 2021-07-02 00:00:00  560408                       -32                       2
##  3 2021-07-03 00:00:00  570110                       -34                      -1
##  4 2021-07-04 00:00:00  580595                       -34                      -2
##  5 2021-07-05 00:00:00  591498                       -37                      -6
##  6 2021-07-06 00:00:00  600937                       -31                       2
##  7 2021-07-07 00:00:00  610303                       -36                       2
##  8 2021-07-08 00:00:00  623277                       -39                      -5
##  9 2021-07-09 00:00:00  636389                       -32                      -2
## 10 2021-07-10 00:00:00  649309                       -35                      -6
## # ... with 21 more rows, and 4 more variables:
## #   parks_percent_change_from_baseline <dbl>,
## #   transit_stations_percent_change_from_baseline <dbl>,
## #   workplaces_percent_change_from_baseline <dbl>,
## #   residential_percent_change_from_baseline <dbl>

3.Mengetahui nilai minimum(Q1), Median(Q2), Maksimum(Q3)

summary(datacovidjuli2021)
##     Tanggal                       POSITIF      
##  Min.   :2021-07-01 00:00:00   Min.   :551009  
##  1st Qu.:2021-07-08 12:00:00   1st Qu.:629833  
##  Median :2021-07-16 00:00:00   Median :727016  
##  Mean   :2021-07-16 00:00:00   Mean   :705205  
##  3rd Qu.:2021-07-23 12:00:00   3rd Qu.:782700  
##  Max.   :2021-07-31 00:00:00   Max.   :814653  
##  retail_and_recreation_percent_change_from_baseline
##  Min.   :-42.00                                    
##  1st Qu.:-34.00                                    
##  Median :-31.00                                    
##  Mean   :-31.16                                    
##  3rd Qu.:-28.00                                    
##  Max.   :-22.00                                    
##  grocery_and_pharmacy_percent_change_from_baseline
##  Min.   :-13.000                                  
##  1st Qu.: -5.500                                  
##  Median : -2.000                                  
##  Mean   : -2.161                                  
##  3rd Qu.:  0.000                                  
##  Max.   :  6.000                                  
##  parks_percent_change_from_baseline
##  Min.   :-67.0                     
##  1st Qu.:-54.0                     
##  Median :-50.0                     
##  Mean   :-50.9                     
##  3rd Qu.:-47.0                     
##  Max.   :-41.0                     
##  transit_stations_percent_change_from_baseline
##  Min.   :-61.00                               
##  1st Qu.:-51.00                               
##  Median :-47.00                               
##  Mean   :-47.84                               
##  3rd Qu.:-45.00                               
##  Max.   :-39.00                               
##  workplaces_percent_change_from_baseline
##  Min.   :-73.00                         
##  1st Qu.:-39.50                         
##  Median :-35.00                         
##  Mean   :-34.03                         
##  3rd Qu.:-23.50                         
##  Max.   :-14.00                         
##  residential_percent_change_from_baseline
##  Min.   : 7.00                           
##  1st Qu.:11.50                           
##  Median :14.00                           
##  Mean   :13.61                           
##  3rd Qu.:16.00                           
##  Max.   :22.00

4.Membuat Matriks scatterplot dengan Fungsi pairs()

pairs(datacovidjuli2021)

Melakukan drop terhahap panel bawah grafik tersebut

pairs(datacovidjuli2021, lower.panel=NULL)

5.Visualisasi Data dengan Fungsi plot()

plot(datacovidjuli2021$POSITIF ~ datacovidjuli2021$Tanggal, data = datacovidjuli2021)

Visualisasi Data dimana data Positif Covid sebagai Variabel Y dan Google Mobility Index sebagai variabel Y

plot(datacovidjuli2021$POSITIF ~ datacovidjuli2021$
              retail_and_recreation_percent_change_from_baseline+datacovidjuli2021$
      grocery_and_pharmacy_percent_change_from_baseline+datacovidjuli2021$
      parks_percent_change_from_baseline+datacovidjuli2021$
      transit_stations_percent_change_from_baseline+datacovidjuli2021$
      workplaces_percent_change_from_baseline+datacovidjuli2021$
      residential_percent_change_from_baseline, data = datacovidjuli2021)

6.Korelasi Antar Variabel

korelasi adalah nilai yang menunjukkan kekuatan dan arah hubungan linier antara dua peubah acak.

a.Korelasi variabel y dengan x1

cor(datacovidjuli2021$POSITIF,datacovidjuli2021$
      retail_and_recreation_percent_change_from_baseline)
## [1] 0.725499

b. Korelasi variabel y dengan x2

cor(datacovidjuli2021$POSITIF,datacovidjuli2021$
      grocery_and_pharmacy_percent_change_from_baseline)
## [1] 0.1305265

c. Korelasi variabel y dengan x3

cor(datacovidjuli2021$POSITIF,datacovidjuli2021$
      parks_percent_change_from_baseline)
## [1] 0.5144597

d. Korelasi variabel y dengan x4

cor(datacovidjuli2021$POSITIF,datacovidjuli2021$
      transit_stations_percent_change_from_baseline)
## [1] 0.5545586

e. Korelasi variabel y dengan x5

cor(datacovidjuli2021$POSITIF,datacovidjuli2021$
      workplaces_percent_change_from_baseline)
## [1] 0.1930649

f. Korelasi variabel y dengan x6

cor(datacovidjuli2021$POSITIF,datacovidjuli2021$
      residential_percent_change_from_baseline)
## [1] -0.3703477

7. Menggunakan Permodelan dari Data Pada Positif Covid

model <- lm(datacovidjuli2021$POSITIF ~ datacovidjuli2021$Tanggal, data = datacovidjuli2021)
summary(model)
## 
## Call:
## lm(formula = datacovidjuli2021$POSITIF ~ datacovidjuli2021$Tanggal, 
##     data = datacovidjuli2021)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -31174.0 -11347.8    353.2  11756.5  22604.5 
## 
## Coefficients:
##                             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)               -1.758e+08  5.680e+06  -30.95   <2e-16 ***
## datacovidjuli2021$Tanggal  1.085e-01  3.492e-03   31.07   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 15030 on 29 degrees of freedom
## Multiple R-squared:  0.9708, Adjusted R-squared:  0.9698 
## F-statistic: 965.4 on 1 and 29 DF,  p-value: < 2.2e-16

8.menggunakan Fungsi anova()

anova(model)
## Analysis of Variance Table
## 
## Response: datacovidjuli2021$POSITIF
##                           Df     Sum Sq    Mean Sq F value    Pr(>F)    
## datacovidjuli2021$Tanggal  1 2.1796e+11 2.1796e+11  965.38 < 2.2e-16 ***
## Residuals                 29 6.5475e+09 2.2578e+08                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

9.Membuat plot() Model dari Data Real dan Prediksi

plot(datacovidjuli2021$POSITIF ~ datacovidjuli2021$
              Tanggal, 
     data = datacovidjuli2021, col = "red", pch = 20, cex = 1.5, 
     main = "Data real dari Positif Covid Mobile Index")
abline(model) #Add a regression line

Penjelasan Grafik diatas yang mana terdapat titik bewarna biri adalah data real atau nyata yang diambil dari data kasus positf covid bulan juli 2021 dan terdapat garis hita yang mana adalah data predeksi.

plot(cooks.distance(model), pch = 16, col = "red") #Plot the Cooks Distances.

plot(model)

10.Penggunaan AIC dan BIC

a.AIC

AIC(model)
## [1] 688.1934

b.BIC

BIC(model)
## [1] 692.4953

c.Nilai Predicted dan Visualisasi

head(predict(model), n = 11)
##        1        2        3        4        5        6        7        8 
## 564582.4 573957.2 583332.0 592706.9 602081.7 611456.5 620831.3 630206.1 
##        9       10       11 
## 639580.9 648955.8 658330.6
plot(head(predict(model), n = 10))

11.Menampilkan Nilai Residuals

head(resid(model), n = 11)
##           1           2           3           4           5           6 
## -13573.3992 -13549.2177 -13222.0363 -12111.8548 -10583.6734 -10519.4919 
##           7           8           9          10          11 
## -10528.3105  -6929.1290  -3191.9476    353.2339   4111.4153
coef(model)
##               (Intercept) datacovidjuli2021$Tanggal 
##             -1.757664e+08              1.085048e-01

12.Menambahkan Data Residuals dan Data Predicted dengan tabel

a.Data Residuals

datacovidjuli2021$residuals <- model$residuals

b.Data Predicted

datacovidjuli2021$predicted <- model$fitted.values
datacovidjuli2021
## # A tibble: 31 x 10
##    Tanggal             POSITIF retail_and_recreation_pe~ grocery_and_pharmacy_p~
##    <dttm>                <dbl>                     <dbl>                   <dbl>
##  1 2021-07-01 00:00:00  551009                       -42                      -5
##  2 2021-07-02 00:00:00  560408                       -32                       2
##  3 2021-07-03 00:00:00  570110                       -34                      -1
##  4 2021-07-04 00:00:00  580595                       -34                      -2
##  5 2021-07-05 00:00:00  591498                       -37                      -6
##  6 2021-07-06 00:00:00  600937                       -31                       2
##  7 2021-07-07 00:00:00  610303                       -36                       2
##  8 2021-07-08 00:00:00  623277                       -39                      -5
##  9 2021-07-09 00:00:00  636389                       -32                      -2
## 10 2021-07-10 00:00:00  649309                       -35                      -6
## # ... with 21 more rows, and 6 more variables:
## #   parks_percent_change_from_baseline <dbl>,
## #   transit_stations_percent_change_from_baseline <dbl>,
## #   workplaces_percent_change_from_baseline <dbl>,
## #   residential_percent_change_from_baseline <dbl>, residuals <dbl>,
## #   predicted <dbl>

13.Menambahkan scatter.smooth, boxplot dan plot untuk Visualisasi

scatter.smooth(x=datacovidjuli2021
$Tanggal, y=datacovidjuli2021$POSITIF,  main="Tanggal ~ POSITIF")

boxplot(datacovidjuli2021$POSITIF, main="POSITIF", 
        boxplot.stats(datacovidjuli2021$POSITIF)$out)

plot(density(datacovidjuli2021$POSITIF), main="Google Mobility Index: POSITIF",      ylab="Frequency")

coefs <- coef(model)
plot(POSITIF ~ Tanggal, data = datacovidjuli2021)
abline(coefs)
text(x = 12, y = 10, paste('expression = ', round(coefs[1], 2),  '+', 
                           round(coefs[2], 2), '*POSITIF'))

14.Menguji Korelasi Antar Variabel

a.Menguji korelasi variabel y dengan x1

cor.test(datacovidjuli2021$
 retail_and_recreation_percent_change_from_baseline, 
         datacovidjuli2021$POSITIF
 )
## 
##  Pearson's product-moment correlation
## 
## data:  datacovidjuli2021$retail_and_recreation_percent_change_from_baseline and datacovidjuli2021$POSITIF
## t = 5.6768, df = 29, p-value = 3.878e-06
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.4995903 0.8590104
## sample estimates:
##      cor 
## 0.725499

b.Menguji korelasi variabel y dengan x2

cor.test(datacovidjuli2021$
 grocery_and_pharmacy_percent_change_from_baseline, 
         datacovidjuli2021$POSITIF)
## 
##  Pearson's product-moment correlation
## 
## data:  datacovidjuli2021$grocery_and_pharmacy_percent_change_from_baseline and datacovidjuli2021$POSITIF
## t = 0.70897, df = 29, p-value = 0.484
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.2346671  0.4634325
## sample estimates:
##       cor 
## 0.1305265

c.Menguji korelasi variabel y dengan x3

cor.test(datacovidjuli2021$
 parks_percent_change_from_baseline, 
         datacovidjuli2021$POSITIF)
## 
##  Pearson's product-moment correlation
## 
## data:  datacovidjuli2021$parks_percent_change_from_baseline and datacovidjuli2021$POSITIF
## t = 3.2308, df = 29, p-value = 0.003067
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.1958155 0.7348426
## sample estimates:
##       cor 
## 0.5144597

d.Menguji korelasi variabel y dengan x4

cor.test(datacovidjuli2021$
 transit_stations_percent_change_from_baseline, 
         datacovidjuli2021$POSITIF)
## 
##  Pearson's product-moment correlation
## 
## data:  datacovidjuli2021$transit_stations_percent_change_from_baseline and datacovidjuli2021$POSITIF
## t = 3.5888, df = 29, p-value = 0.001206
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.2491836 0.7596297
## sample estimates:
##       cor 
## 0.5545586

e.Menguji korelasi variabel y dengan x5

cor.test(datacovidjuli2021$
 workplaces_percent_change_from_baseline, 
         datacovidjuli2021$POSITIF)
## 
##  Pearson's product-moment correlation
## 
## data:  datacovidjuli2021$workplaces_percent_change_from_baseline and datacovidjuli2021$POSITIF
## t = 1.0596, df = 29, p-value = 0.2981
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.1731184  0.5123545
## sample estimates:
##       cor 
## 0.1930649

f.Menguji korelasi variabel y dengan x6

cor.test(datacovidjuli2021$
 residential_percent_change_from_baseline, 
         datacovidjuli2021$POSITIF)
## 
##  Pearson's product-moment correlation
## 
## data:  datacovidjuli2021$residential_percent_change_from_baseline and datacovidjuli2021$POSITIF
## t = -2.1471, df = 29, p-value = 0.04028
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.64061989 -0.01842556
## sample estimates:
##        cor 
## -0.3703477

Daftar Pustaka

1.https://www.google.com/search?q=regresi+Linier+Berganda&oq=regresi+Linier+Berganda&aqs=chrome..69i57j0i512l6j69i60.5025j0j7&sourceid=chrome&ie=UTF-8

2.https://rpubs.com/suhartono-uinmaliki/877449