Regresi Linear Berganda adalah model regresi linear dengan melibatkan lebih dari satu variable bebas atau predictor. Dalam bahasa inggris, istilah ini disebut dengan multiple linear regression.
library(readxl)
## Warning: package 'readxl' was built under R version 4.1.2
datacovidjuli2021 <- read_excel(path = "C:/Users/User/Documents/SEMESTER 2/LINEAR ALGEBRA/data sembuh covid-19 DKI Jakarta Juli 2021.xlsx")
datacovidjuli2021
## # A tibble: 31 x 8
## Tanggal Sembuh retail_and_recr~ grocery_and_pha~ parks_percent_c~
## <dttm> <dbl> <dbl> <dbl> <dbl>
## 1 2021-07-01 00:00:00 468461 -42 -5 -67
## 2 2021-07-02 00:00:00 473467 -32 2 -51
## 3 2021-07-03 00:00:00 479150 -34 -1 -53
## 4 2021-07-04 00:00:00 484949 -34 -2 -52
## 5 2021-07-05 00:00:00 491556 -37 -6 -55
## 6 2021-07-06 00:00:00 497492 -31 2 -45
## 7 2021-07-07 00:00:00 501199 -36 2 -56
## 8 2021-07-08 00:00:00 512085 -39 -5 -63
## 9 2021-07-09 00:00:00 526941 -32 -2 -50
## 10 2021-07-10 00:00:00 543867 -35 -6 -55
## # ... with 21 more rows, and 3 more variables:
## # transit_stations_percent_change_from_baseline <dbl>,
## # workplaces_percent_change_from_baseline <dbl>,
## # residential_percent_change_from_baseline <dbl>
summary(datacovidjuli2021)
## Tanggal Sembuh
## Min. :2021-07-01 00:00:00 Min. :468461
## 1st Qu.:2021-07-08 12:00:00 1st Qu.:519513
## Median :2021-07-16 00:00:00 Median :604060
## Mean :2021-07-16 00:00:00 Mean :614736
## 3rd Qu.:2021-07-23 12:00:00 3rd Qu.:695171
## Max. :2021-07-31 00:00:00 Max. :784668
## retail_and_recreation_percent_change_from_baseline
## Min. :-42.00
## 1st Qu.:-34.00
## Median :-31.00
## Mean :-31.16
## 3rd Qu.:-28.00
## Max. :-22.00
## grocery_and_pharmacy_percent_change_from_baseline
## Min. :-13.000
## 1st Qu.: -5.500
## Median : -2.000
## Mean : -2.161
## 3rd Qu.: 0.000
## Max. : 6.000
## parks_percent_change_from_baseline
## Min. :-67.0
## 1st Qu.:-54.0
## Median :-50.0
## Mean :-50.9
## 3rd Qu.:-47.0
## Max. :-41.0
## transit_stations_percent_change_from_baseline
## Min. :-61.00
## 1st Qu.:-51.00
## Median :-47.00
## Mean :-47.84
## 3rd Qu.:-45.00
## Max. :-39.00
## workplaces_percent_change_from_baseline
## Min. :-73.00
## 1st Qu.:-39.50
## Median :-35.00
## Mean :-34.03
## 3rd Qu.:-23.50
## Max. :-14.00
## residential_percent_change_from_baseline
## Min. : 7.00
## 1st Qu.:11.50
## Median :14.00
## Mean :13.61
## 3rd Qu.:16.00
## Max. :22.00
pairs(datacovidjuli2021)
pairs(datacovidjuli2021, lower.panel=NULL)
plot(datacovidjuli2021$Sembuh ~ datacovidjuli2021$Tanggal, data = datacovidjuli2021)
Visualisasi Data dimana data sembuh Covid sebagai Variabel Y dan Google Mobility Index sebagai variabel Y
plot(datacovidjuli2021$Sembuh ~ datacovidjuli2021$
retail_and_recreation_percent_change_from_baseline+datacovidjuli2021$
grocery_and_pharmacy_percent_change_from_baseline+datacovidjuli2021$
parks_percent_change_from_baseline+datacovidjuli2021$
transit_stations_percent_change_from_baseline+datacovidjuli2021$
workplaces_percent_change_from_baseline+datacovidjuli2021$
residential_percent_change_from_baseline, data = datacovidjuli2021)
korelasi adalah nilai yang menunjukkan kekuatan dan arah hubungan linier antara dua peubah acak.
cor(datacovidjuli2021$Sembuh,datacovidjuli2021$
retail_and_recreation_percent_change_from_baseline)
## [1] 0.7820109
cor(datacovidjuli2021$Sembuh,datacovidjuli2021$
grocery_and_pharmacy_percent_change_from_baseline)
## [1] 0.2624391
cor(datacovidjuli2021$Sembuh,datacovidjuli2021$
parks_percent_change_from_baseline)
## [1] 0.5192907
cor(datacovidjuli2021$Sembuh,datacovidjuli2021$
transit_stations_percent_change_from_baseline)
## [1] 0.6064333
cor(datacovidjuli2021$Sembuh,datacovidjuli2021$
workplaces_percent_change_from_baseline)
## [1] 0.2220271
cor(datacovidjuli2021$Sembuh,datacovidjuli2021$
residential_percent_change_from_baseline)
## [1] -0.4106419
model <- lm(datacovidjuli2021$Sembuh ~ datacovidjuli2021$Tanggal, data = datacovidjuli2021)
summary(model)
##
## Call:
## lm(formula = datacovidjuli2021$Sembuh ~ datacovidjuli2021$Tanggal,
## data = datacovidjuli2021)
##
## Residuals:
## Min 1Q Median 3Q Max
## -13716 -7964 -894 8517 20479
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -2.086e+08 3.553e+06 -58.73 <2e-16 ***
## datacovidjuli2021$Tanggal 1.287e-01 2.184e-03 58.90 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 9399 on 29 degrees of freedom
## Multiple R-squared: 0.9917, Adjusted R-squared: 0.9914
## F-statistic: 3469 on 1 and 29 DF, p-value: < 2.2e-16
anova(model)
## Analysis of Variance Table
##
## Response: datacovidjuli2021$Sembuh
## Df Sum Sq Mean Sq F value Pr(>F)
## datacovidjuli2021$Tanggal 1 3.0649e+11 3.0649e+11 3469.3 < 2.2e-16 ***
## Residuals 29 2.5620e+09 8.8345e+07
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
plot(datacovidjuli2021$Sembuh ~ datacovidjuli2021$
Tanggal,
data = datacovidjuli2021, col = "navy", pch = 20, cex = 1.5,
main = "Data Sembuh Covid-19 di DKI Jakarta dan Google Mobility Index")
abline(model) #Add a regression line
plot(cooks.distance(model), pch = 16, col = "navy") #Plot the Cooks Distances.
plot(model)
AIC(model)
## [1] 659.1062
BIC(model)
## [1] 663.4081
head(predict(model), n = 11)
## 1 2 3 4 5 6 7 8
## 447982.5 459099.4 470216.2 481333.1 492450.0 503566.8 514683.7 525800.6
## 9 10 11
## 536917.4 548034.3 559151.2
plot(head(predict(model), n = 10))
head(resid(model), n = 11)
## 1 2 3 4 5 6
## 20478.5081 14367.6419 8933.7758 3615.9097 -893.9565 -6074.8226
## 7 8 9 10 11
## -13484.6887 -13715.5548 -9976.4210 -4167.2871 5285.8468
coef(model)
## (Intercept) datacovidjuli2021$Tanggal
## -2.086492e+08 1.286674e-01
datacovidjuli2021$residuals <- model$residuals
datacovidjuli2021$predicted <- model$fitted.values
datacovidjuli2021
## # A tibble: 31 x 10
## Tanggal Sembuh retail_and_recr~ grocery_and_pha~ parks_percent_c~
## <dttm> <dbl> <dbl> <dbl> <dbl>
## 1 2021-07-01 00:00:00 468461 -42 -5 -67
## 2 2021-07-02 00:00:00 473467 -32 2 -51
## 3 2021-07-03 00:00:00 479150 -34 -1 -53
## 4 2021-07-04 00:00:00 484949 -34 -2 -52
## 5 2021-07-05 00:00:00 491556 -37 -6 -55
## 6 2021-07-06 00:00:00 497492 -31 2 -45
## 7 2021-07-07 00:00:00 501199 -36 2 -56
## 8 2021-07-08 00:00:00 512085 -39 -5 -63
## 9 2021-07-09 00:00:00 526941 -32 -2 -50
## 10 2021-07-10 00:00:00 543867 -35 -6 -55
## # ... with 21 more rows, and 5 more variables:
## # transit_stations_percent_change_from_baseline <dbl>,
## # workplaces_percent_change_from_baseline <dbl>,
## # residential_percent_change_from_baseline <dbl>, residuals <dbl>,
## # predicted <dbl>
scatter.smooth(x=datacovidjuli2021$Tanggal, y=datacovidjuli2021$Sembuh,
main="Tanggal ~ Sembuh")
boxplot(datacovidjuli2021$Sembuh, main="Sembuh",
boxplot.stats(datacovidjuli2021$Sembuh)$out)
plot(density(datacovidjuli2021$Sembuh), main="Google Mobility Index: Sembuh",
ylab="Frequency")
coefs <- coef(model)
plot(Sembuh ~ Tanggal, data = datacovidjuli2021)
abline(coefs)
text(x = 12, y = 10, paste('expression = ', round(coefs[1], 2), '+',
round(coefs[2], 2), '*Sembuh'))
cor.test(datacovidjuli2021$
retail_and_recreation_percent_change_from_baseline,
datacovidjuli2021$Sembuh)
##
## Pearson's product-moment correlation
##
## data: datacovidjuli2021$retail_and_recreation_percent_change_from_baseline and datacovidjuli2021$Sembuh
## t = 6.7568, df = 29, p-value = 2.045e-07
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.5916026 0.8897917
## sample estimates:
## cor
## 0.7820109
cor.test(datacovidjuli2021$
grocery_and_pharmacy_percent_change_from_baseline,
datacovidjuli2021$Sembuh)
##
## Pearson's product-moment correlation
##
## data: datacovidjuli2021$grocery_and_pharmacy_percent_change_from_baseline and datacovidjuli2021$Sembuh
## t = 1.4646, df = 29, p-value = 0.1538
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.1013233 0.5643032
## sample estimates:
## cor
## 0.2624391
cor.test(datacovidjuli2021$
parks_percent_change_from_baseline,
datacovidjuli2021$Sembuh)
##
## Pearson's product-moment correlation
##
## data: datacovidjuli2021$parks_percent_change_from_baseline and datacovidjuli2021$Sembuh
## t = 3.2723, df = 29, p-value = 0.002757
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.2021467 0.7378604
## sample estimates:
## cor
## 0.5192907
cor.test(datacovidjuli2021$
transit_stations_percent_change_from_baseline,
datacovidjuli2021$Sembuh)
##
## Pearson's product-moment correlation
##
## data: datacovidjuli2021$transit_stations_percent_change_from_baseline and datacovidjuli2021$Sembuh
## t = 4.1072, df = 29, p-value = 0.0002988
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.3210902 0.7908358
## sample estimates:
## cor
## 0.6064333
cor.test(datacovidjuli2021$
workplaces_percent_change_from_baseline,
datacovidjuli2021$Sembuh)
##
## Pearson's product-moment correlation
##
## data: datacovidjuli2021$workplaces_percent_change_from_baseline and datacovidjuli2021$Sembuh
## t = 1.2263, df = 29, p-value = 0.23
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.1436114 0.5343298
## sample estimates:
## cor
## 0.2220271
cor.test(datacovidjuli2021$
residential_percent_change_from_baseline,
datacovidjuli2021$Sembuh)
##
## Pearson's product-moment correlation
##
## data: datacovidjuli2021$residential_percent_change_from_baseline and datacovidjuli2021$Sembuh
## t = -2.4253, df = 29, p-value = 0.02175
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.66781095 -0.06588915
## sample estimates:
## cor
## -0.4106419
Sumber:
https://rpubs.com/suhartono-uinmaliki/877449
summary(cars)
## speed dist
## Min. : 4.0 Min. : 2.00
## 1st Qu.:12.0 1st Qu.: 26.00
## Median :15.0 Median : 36.00
## Mean :15.4 Mean : 42.98
## 3rd Qu.:19.0 3rd Qu.: 56.00
## Max. :25.0 Max. :120.00