Lembaga : Universitas Islam Negeri Maulana Malik Ibrahim Malang
Jurusan : Teknik Informatika

1. Pengertian Linear Berganda

Regresi Linear Berganda adalah model regresi linear dengan melibatkan lebih dari satu variable bebas atau predictor. Dalam bahasa inggris, istilah ini disebut dengan multiple linear regression.

2. Menampilkan Data Set Sembuh Covid-19

library(readxl)
## Warning: package 'readxl' was built under R version 4.1.2
datacovidjuli2021 <- read_excel(path = "C:/Users/User/Documents/SEMESTER 2/LINEAR ALGEBRA/data sembuh covid-19 DKI Jakarta Juli 2021.xlsx")
datacovidjuli2021
## # A tibble: 31 x 8
##    Tanggal             Sembuh retail_and_recr~ grocery_and_pha~ parks_percent_c~
##    <dttm>               <dbl>            <dbl>            <dbl>            <dbl>
##  1 2021-07-01 00:00:00 468461              -42               -5              -67
##  2 2021-07-02 00:00:00 473467              -32                2              -51
##  3 2021-07-03 00:00:00 479150              -34               -1              -53
##  4 2021-07-04 00:00:00 484949              -34               -2              -52
##  5 2021-07-05 00:00:00 491556              -37               -6              -55
##  6 2021-07-06 00:00:00 497492              -31                2              -45
##  7 2021-07-07 00:00:00 501199              -36                2              -56
##  8 2021-07-08 00:00:00 512085              -39               -5              -63
##  9 2021-07-09 00:00:00 526941              -32               -2              -50
## 10 2021-07-10 00:00:00 543867              -35               -6              -55
## # ... with 21 more rows, and 3 more variables:
## #   transit_stations_percent_change_from_baseline <dbl>,
## #   workplaces_percent_change_from_baseline <dbl>,
## #   residential_percent_change_from_baseline <dbl>

3. Menampilkan Nilai Minimum (Q1), Median (Q2), dan Maximum (Q3) dengan Fungsi summary()

summary(datacovidjuli2021)
##     Tanggal                        Sembuh      
##  Min.   :2021-07-01 00:00:00   Min.   :468461  
##  1st Qu.:2021-07-08 12:00:00   1st Qu.:519513  
##  Median :2021-07-16 00:00:00   Median :604060  
##  Mean   :2021-07-16 00:00:00   Mean   :614736  
##  3rd Qu.:2021-07-23 12:00:00   3rd Qu.:695171  
##  Max.   :2021-07-31 00:00:00   Max.   :784668  
##  retail_and_recreation_percent_change_from_baseline
##  Min.   :-42.00                                    
##  1st Qu.:-34.00                                    
##  Median :-31.00                                    
##  Mean   :-31.16                                    
##  3rd Qu.:-28.00                                    
##  Max.   :-22.00                                    
##  grocery_and_pharmacy_percent_change_from_baseline
##  Min.   :-13.000                                  
##  1st Qu.: -5.500                                  
##  Median : -2.000                                  
##  Mean   : -2.161                                  
##  3rd Qu.:  0.000                                  
##  Max.   :  6.000                                  
##  parks_percent_change_from_baseline
##  Min.   :-67.0                     
##  1st Qu.:-54.0                     
##  Median :-50.0                     
##  Mean   :-50.9                     
##  3rd Qu.:-47.0                     
##  Max.   :-41.0                     
##  transit_stations_percent_change_from_baseline
##  Min.   :-61.00                               
##  1st Qu.:-51.00                               
##  Median :-47.00                               
##  Mean   :-47.84                               
##  3rd Qu.:-45.00                               
##  Max.   :-39.00                               
##  workplaces_percent_change_from_baseline
##  Min.   :-73.00                         
##  1st Qu.:-39.50                         
##  Median :-35.00                         
##  Mean   :-34.03                         
##  3rd Qu.:-23.50                         
##  Max.   :-14.00                         
##  residential_percent_change_from_baseline
##  Min.   : 7.00                           
##  1st Qu.:11.50                           
##  Median :14.00                           
##  Mean   :13.61                           
##  3rd Qu.:16.00                           
##  Max.   :22.00

4. Membuat Matriks dengan Fungsi pairs()

pairs(datacovidjuli2021)

pairs(datacovidjuli2021, lower.panel=NULL)

5. Visualisasi Data dengan Fungsi plot()

plot(datacovidjuli2021$Sembuh ~ datacovidjuli2021$Tanggal, data = datacovidjuli2021)

Visualisasi Data dimana data sembuh Covid sebagai Variabel Y dan Google Mobility Index sebagai variabel Y

plot(datacovidjuli2021$Sembuh ~ datacovidjuli2021$
              retail_and_recreation_percent_change_from_baseline+datacovidjuli2021$
      grocery_and_pharmacy_percent_change_from_baseline+datacovidjuli2021$
      parks_percent_change_from_baseline+datacovidjuli2021$
      transit_stations_percent_change_from_baseline+datacovidjuli2021$
      workplaces_percent_change_from_baseline+datacovidjuli2021$
      residential_percent_change_from_baseline, data = datacovidjuli2021)

6. Membangun Korelasi antar Variabel

korelasi adalah nilai yang menunjukkan kekuatan dan arah hubungan linier antara dua peubah acak.

a. Korelasi Variabel y dengan x1

cor(datacovidjuli2021$Sembuh,datacovidjuli2021$
      retail_and_recreation_percent_change_from_baseline)
## [1] 0.7820109

b. Korelasi Variabel y dengan x2

cor(datacovidjuli2021$Sembuh,datacovidjuli2021$
      grocery_and_pharmacy_percent_change_from_baseline)
## [1] 0.2624391

c. Korelasi Variabel y dengan x3

cor(datacovidjuli2021$Sembuh,datacovidjuli2021$
      parks_percent_change_from_baseline)
## [1] 0.5192907

d. Korelasi Variabel y dengan x4

cor(datacovidjuli2021$Sembuh,datacovidjuli2021$
      transit_stations_percent_change_from_baseline)
## [1] 0.6064333

e. Korelasi variabel y dengan x5

cor(datacovidjuli2021$Sembuh,datacovidjuli2021$
      workplaces_percent_change_from_baseline)
## [1] 0.2220271

f. Korelasi variabel y dengan x6

cor(datacovidjuli2021$Sembuh,datacovidjuli2021$
      residential_percent_change_from_baseline)
## [1] -0.4106419

7. Menggunakan Permodelan dari Data Sembuh Covid-19

model <- lm(datacovidjuli2021$Sembuh ~ datacovidjuli2021$Tanggal, data = datacovidjuli2021)
summary(model)
## 
## Call:
## lm(formula = datacovidjuli2021$Sembuh ~ datacovidjuli2021$Tanggal, 
##     data = datacovidjuli2021)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -13716  -7964   -894   8517  20479 
## 
## Coefficients:
##                             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)               -2.086e+08  3.553e+06  -58.73   <2e-16 ***
## datacovidjuli2021$Tanggal  1.287e-01  2.184e-03   58.90   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 9399 on 29 degrees of freedom
## Multiple R-squared:  0.9917, Adjusted R-squared:  0.9914 
## F-statistic:  3469 on 1 and 29 DF,  p-value: < 2.2e-16

8. Menggunakan Fungsi anova()

anova(model)
## Analysis of Variance Table
## 
## Response: datacovidjuli2021$Sembuh
##                           Df     Sum Sq    Mean Sq F value    Pr(>F)    
## datacovidjuli2021$Tanggal  1 3.0649e+11 3.0649e+11  3469.3 < 2.2e-16 ***
## Residuals                 29 2.5620e+09 8.8345e+07                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

9. Membuat plot() Model dari Data Real dan Prediksi

plot(datacovidjuli2021$Sembuh ~ datacovidjuli2021$
              Tanggal, 
     data = datacovidjuli2021, col = "navy", pch = 20, cex = 1.5, 
     main = "Data Sembuh Covid-19 di DKI Jakarta dan Google Mobility Index")
abline(model) #Add a regression line

plot(cooks.distance(model), pch = 16, col = "navy") #Plot the Cooks Distances.

plot(model)

10. Penggunaan AIC dan BIC

a. AIC

AIC(model)
## [1] 659.1062

b. BIC

BIC(model)
## [1] 663.4081

11. Memunculkan Nilai Prediksi dan Visualisasi

head(predict(model), n = 11)
##        1        2        3        4        5        6        7        8 
## 447982.5 459099.4 470216.2 481333.1 492450.0 503566.8 514683.7 525800.6 
##        9       10       11 
## 536917.4 548034.3 559151.2
plot(head(predict(model), n = 10))

12. Menampilkan Nilai Residuals

head(resid(model), n = 11)
##           1           2           3           4           5           6 
##  20478.5081  14367.6419   8933.7758   3615.9097   -893.9565  -6074.8226 
##           7           8           9          10          11 
## -13484.6887 -13715.5548  -9976.4210  -4167.2871   5285.8468
coef(model)
##               (Intercept) datacovidjuli2021$Tanggal 
##             -2.086492e+08              1.286674e-01

13. Menambahkan Data Residuals dan Data Predicted dengan Tabel

a. Data Residuals

datacovidjuli2021$residuals <- model$residuals

b. Data Pedicted

datacovidjuli2021$predicted <- model$fitted.values
datacovidjuli2021
## # A tibble: 31 x 10
##    Tanggal             Sembuh retail_and_recr~ grocery_and_pha~ parks_percent_c~
##    <dttm>               <dbl>            <dbl>            <dbl>            <dbl>
##  1 2021-07-01 00:00:00 468461              -42               -5              -67
##  2 2021-07-02 00:00:00 473467              -32                2              -51
##  3 2021-07-03 00:00:00 479150              -34               -1              -53
##  4 2021-07-04 00:00:00 484949              -34               -2              -52
##  5 2021-07-05 00:00:00 491556              -37               -6              -55
##  6 2021-07-06 00:00:00 497492              -31                2              -45
##  7 2021-07-07 00:00:00 501199              -36                2              -56
##  8 2021-07-08 00:00:00 512085              -39               -5              -63
##  9 2021-07-09 00:00:00 526941              -32               -2              -50
## 10 2021-07-10 00:00:00 543867              -35               -6              -55
## # ... with 21 more rows, and 5 more variables:
## #   transit_stations_percent_change_from_baseline <dbl>,
## #   workplaces_percent_change_from_baseline <dbl>,
## #   residential_percent_change_from_baseline <dbl>, residuals <dbl>,
## #   predicted <dbl>

14. Menambahkan Fungsi scatter.smooth(), boxplot(), dan plot() untuk Visualisasi

scatter.smooth(x=datacovidjuli2021$Tanggal, y=datacovidjuli2021$Sembuh, 
               main="Tanggal ~ Sembuh")

boxplot(datacovidjuli2021$Sembuh, main="Sembuh", 
        boxplot.stats(datacovidjuli2021$Sembuh)$out)

plot(density(datacovidjuli2021$Sembuh), main="Google Mobility Index: Sembuh", 
     ylab="Frequency")

coefs <- coef(model)
plot(Sembuh ~ Tanggal, data = datacovidjuli2021)
abline(coefs)
text(x = 12, y = 10, paste('expression = ', round(coefs[1], 2),  '+', 
                           round(coefs[2], 2), '*Sembuh'))

15. Menguji Korelasi antar Variabel

a. Menguji Korelasi variabel y dengan x1

cor.test(datacovidjuli2021$
 retail_and_recreation_percent_change_from_baseline, 
         datacovidjuli2021$Sembuh)
## 
##  Pearson's product-moment correlation
## 
## data:  datacovidjuli2021$retail_and_recreation_percent_change_from_baseline and datacovidjuli2021$Sembuh
## t = 6.7568, df = 29, p-value = 2.045e-07
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.5916026 0.8897917
## sample estimates:
##       cor 
## 0.7820109

b. Menguji Korelasi variabel y dengan x2

cor.test(datacovidjuli2021$
 grocery_and_pharmacy_percent_change_from_baseline, 
         datacovidjuli2021$Sembuh)
## 
##  Pearson's product-moment correlation
## 
## data:  datacovidjuli2021$grocery_and_pharmacy_percent_change_from_baseline and datacovidjuli2021$Sembuh
## t = 1.4646, df = 29, p-value = 0.1538
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.1013233  0.5643032
## sample estimates:
##       cor 
## 0.2624391

c. Menguji Korelasi variabel y dengan x3

cor.test(datacovidjuli2021$
 parks_percent_change_from_baseline, 
         datacovidjuli2021$Sembuh)
## 
##  Pearson's product-moment correlation
## 
## data:  datacovidjuli2021$parks_percent_change_from_baseline and datacovidjuli2021$Sembuh
## t = 3.2723, df = 29, p-value = 0.002757
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.2021467 0.7378604
## sample estimates:
##       cor 
## 0.5192907

d. Menguji Korelasi variabel y dengan x4

cor.test(datacovidjuli2021$
 transit_stations_percent_change_from_baseline, 
         datacovidjuli2021$Sembuh)
## 
##  Pearson's product-moment correlation
## 
## data:  datacovidjuli2021$transit_stations_percent_change_from_baseline and datacovidjuli2021$Sembuh
## t = 4.1072, df = 29, p-value = 0.0002988
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.3210902 0.7908358
## sample estimates:
##       cor 
## 0.6064333

e. Menguji Korelasi variabel y dengan x5

cor.test(datacovidjuli2021$
 workplaces_percent_change_from_baseline, 
         datacovidjuli2021$Sembuh)
## 
##  Pearson's product-moment correlation
## 
## data:  datacovidjuli2021$workplaces_percent_change_from_baseline and datacovidjuli2021$Sembuh
## t = 1.2263, df = 29, p-value = 0.23
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.1436114  0.5343298
## sample estimates:
##       cor 
## 0.2220271

f. Menguji Korelasi variabel y dengan x6

cor.test(datacovidjuli2021$
 residential_percent_change_from_baseline, 
         datacovidjuli2021$Sembuh)
## 
##  Pearson's product-moment correlation
## 
## data:  datacovidjuli2021$residential_percent_change_from_baseline and datacovidjuli2021$Sembuh
## t = -2.4253, df = 29, p-value = 0.02175
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.66781095 -0.06588915
## sample estimates:
##        cor 
## -0.4106419

Sumber:

https://rpubs.com/suhartono-uinmaliki/877449

https://www.google.com/search?q=regresi+Linier+Berganda&oq=regresi+Linier+Berganda&aqs=chrome..69i57j0i512l6j69i60.5025j0j7&sourceid=chrome&ie=UTF-8

summary(cars)
##      speed           dist       
##  Min.   : 4.0   Min.   :  2.00  
##  1st Qu.:12.0   1st Qu.: 26.00  
##  Median :15.0   Median : 36.00  
##  Mean   :15.4   Mean   : 42.98  
##  3rd Qu.:19.0   3rd Qu.: 56.00  
##  Max.   :25.0   Max.   :120.00