Pengertian Regresi Linear

Regresi linier adalah model paling sederhana yang paling sering dijelaskan dalam statistik. Modelnya sangat sederhana dimana kita dapat mencoba membangun model dengan pendekatan linier menggunakan prinsip meminimalkan jumlah sisa kuadrat dalam data. Secara artian regresi merupakan suatu metode yang menentukan sebab-akibat antara variabel satu dengan lainnya.

Data Riwayat Covid-19 dan Google Mobility Index di Jakarta pada Bulan Juli 2020

library(readxl)
## Warning: package 'readxl' was built under R version 4.1.2
Data<- read_excel(path = "DataMobility1-7.xlsx")
Data
## # A tibble: 1,048,551 x 13
##    Tanggal             Kota    Positif Dirawat Sembuh Meninggal `Self Isolation`
##    <dttm>              <chr>     <dbl>   <dbl>  <dbl>     <dbl>            <dbl>
##  1 2020-07-01 00:00:00 Jakarta  551009   24184 468461      8528            49836
##  2 2020-07-02 00:00:00 Jakarta  560408   25380 473467      8547            53014
##  3 2020-07-03 00:00:00 Jakarta  570110   27442 479150      8577            54941
##  4 2020-07-04 00:00:00 Jakarta  580595   27687 484949      8652            59307
##  5 2020-07-05 00:00:00 Jakarta  591498   28290 491556      8779            62873
##  6 2020-07-06 00:00:00 Jakarta  600937   29136 497492      8861            65448
##  7 2020-07-07 00:00:00 Jakarta  610303   30418 501199      9042            69644
##  8 2020-07-08 00:00:00 JAKARTA   13069     417   8429       667             3556
##  9 2020-07-09 00:00:00 JAKARTA   13359     451   8647       677             3584
## 10 2020-07-10 00:00:00 JAKARTA   13598     476   8825       684             3613
## # ... with 1,048,541 more rows, and 6 more variables:
## #   retail_and_recreation_percent_change_from_baseline <dbl>,
## #   grocery_and_pharmacy_percent_change_from_baseline <dbl>,
## #   parks_percent_change_from_baseline <dbl>,
## #   transit_stations_percent_change_from_baseline <dbl>,
## #   workplaces_percent_change_from_baseline <dbl>,
## #   residential_percent_change_from_baseline <dbl>
library(ggplot2)
library(reshape2)
## Warning: package 'reshape2' was built under R version 4.1.3
x <- Data$`Self Isolation`
retail <- Data$retail_and_recreation_percent_change_from_baseline
grocery <- Data$grocery_and_pharmacy_percent_change_from_baseline
park <- Data$parks_percent_change_from_baseline
station <- Data$transit_stations_percent_change_from_baseline
workplace <- Data$workplaces_percent_change_from_baseline
residental <- Data$residential_percent_change_from_baseline
df <- data.frame(x, retail, grocery, park, station, workplace,residental  )

# melt the data to a long format
df2 <- melt(data = df, id.vars = "x")

# plot, using the aesthetics argument 'colour'
ggplot(data = df2, aes(x = x, y = value, colour = variable))+
  geom_point() +
  geom_line() + 
  theme(legend.justification = "top") +
  labs(title = "Google Mobility Index", 
         subtitle = "Provinsi DKI Jakarta Indonesia Bulan Juli 2020", 
         y = "Mobility", x = "Data Self Isolation") +
theme(axis.text.x = element_text(angle = -90))
## Warning: Removed 6291120 rows containing missing values (geom_point).
## Warning: Removed 6291120 row(s) containing missing values (geom_path).

Regresi Linear Berganda

model <- lm(Data$`Self Isolation`~Data$retail_and_recreation_percent_change_from_baseline+Data$grocery_and_pharmacy_percent_change_from_baseline+Data$parks_percent_change_from_baseline+Data$transit_stations_percent_change_from_baseline+Data$workplaces_percent_change_from_baseline+Data$residential_percent_change_from_baseline)
model
## 
## Call:
## lm(formula = Data$`Self Isolation` ~ Data$retail_and_recreation_percent_change_from_baseline + 
##     Data$grocery_and_pharmacy_percent_change_from_baseline + 
##     Data$parks_percent_change_from_baseline + Data$transit_stations_percent_change_from_baseline + 
##     Data$workplaces_percent_change_from_baseline + Data$residential_percent_change_from_baseline)
## 
## Coefficients:
##                                             (Intercept)  
##                                               -398864.9  
## Data$retail_and_recreation_percent_change_from_baseline  
##                                                 -6569.0  
##  Data$grocery_and_pharmacy_percent_change_from_baseline  
##                                                  8685.8  
##                 Data$parks_percent_change_from_baseline  
##                                                  -966.0  
##      Data$transit_stations_percent_change_from_baseline  
##                                                 -5482.2  
##            Data$workplaces_percent_change_from_baseline  
##                                                  -159.1  
##           Data$residential_percent_change_from_baseline  
##                                                    73.5

Interpretasi Hasil Regresi Berganda

Dalam menampilkan hasil regresi kita dapat menggunakan fungsi summary.

summary(model)
## 
## Call:
## lm(formula = Data$`Self Isolation` ~ Data$retail_and_recreation_percent_change_from_baseline + 
##     Data$grocery_and_pharmacy_percent_change_from_baseline + 
##     Data$parks_percent_change_from_baseline + Data$transit_stations_percent_change_from_baseline + 
##     Data$workplaces_percent_change_from_baseline + Data$residential_percent_change_from_baseline)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -20460 -12538  -2795   6573  39877 
## 
## Coefficients:
##                                                          Estimate Std. Error
## (Intercept)                                             -398864.9   129882.9
## Data$retail_and_recreation_percent_change_from_baseline   -6569.0     2298.9
## Data$grocery_and_pharmacy_percent_change_from_baseline     8685.8     2042.8
## Data$parks_percent_change_from_baseline                    -966.0      698.9
## Data$transit_stations_percent_change_from_baseline        -5482.2     1653.5
## Data$workplaces_percent_change_from_baseline               -159.1      650.7
## Data$residential_percent_change_from_baseline                73.5      390.2
##                                                         t value Pr(>|t|)    
## (Intercept)                                              -3.071 0.005240 ** 
## Data$retail_and_recreation_percent_change_from_baseline  -2.857 0.008684 ** 
## Data$grocery_and_pharmacy_percent_change_from_baseline    4.252 0.000278 ***
## Data$parks_percent_change_from_baseline                  -1.382 0.179603    
## Data$transit_stations_percent_change_from_baseline       -3.315 0.002900 ** 
## Data$workplaces_percent_change_from_baseline             -0.245 0.808890    
## Data$residential_percent_change_from_baseline             0.188 0.852157    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 18040 on 24 degrees of freedom
##   (1048520 observations deleted due to missingness)
## Multiple R-squared:  0.5316, Adjusted R-squared:  0.4145 
## F-statistic: 4.539 on 6 and 24 DF,  p-value: 0.003283

Plot Hasil Regresi Berganda

1.Plot Pengaruh Variable Retail and Recreation Percent Change From Baseline terhadap Variable Self Isolation

plot(Data$retail_and_recreation_percent_change_from_baseline, Data$`Self Isolation`, col = "dodgerblue")

2.Plot Pengaruh Variable Grocery and Pharmacy Percent Change From Baseline terhadap Variable Self Isolation

plot(Data$grocery_and_pharmacy_percent_change_from_baseline, Data$`Self Isolation`, col = "red")

3.Plot Pengaruh Variable Parks Percent Change From Baseline terhadap Variable Self Isolation

plot(Data$parks_percent_change_from_baseline, Data$`Self Isolation`, col = "darkorange")

4.Plot Pengaruh Variable Stations Percent Change From Baseline terhadap Variable Self Isolation

plot(Data$transit_stations_percent_change_from_baseline, Data$`Self Isolation`, col = "darkgreen")

5.Plot Pengaruh Variable Workplaces Percent Change From Baseline terhadap Variable Self Isolation

plot(Data$workplaces_percent_change_from_baseline, Data$`Self Isolation`, col = "blueviolet")

6.Plot Pengaruh Variable Residential Percent Change From Baseline terhadap Variable Self Isolation

plot(Data$residential_percent_change_from_baseline, Data$`Self Isolation`, col = "darkcyan")

7.Plot Hasil Regresi Linear Berganda

plot(model)

## Warning in sqrt(crit * p * (1 - hh)/hh): NaNs produced

## Warning in sqrt(crit * p * (1 - hh)/hh): NaNs produced