Melakukan Analisa menggunakan Regresi Linear Berganda Covid 19 di DKI Jakarta pada bulan September 2021 dengan memandingkan data dari Google Mobility Index dan Data Riwayat/Penyebaran Covid 19 DKI Jakarta untuk kasus POSITIF Covid 19
library(readxl)
library(reshape2)
## Warning: package 'reshape2' was built under R version 4.1.3
#baca data excel
mobindexjkt <- read_excel("mobindex_jkt.xlsx")
covidjkt <- read_excel("c19data_jkt.xlsx")
retail <- mobindexjkt$retail_and_recreation_percent_change_from_baseline
grocery <- mobindexjkt$grocery_and_pharmacy_percent_change_from_baseline
park <- mobindexjkt$parks_percent_change_from_baseline
station <- mobindexjkt$transit_stations_percent_change_from_baseline
workplace <- mobindexjkt$workplaces_percent_change_from_baseline
residental <- mobindexjkt$residential_percent_change_from_baseline
Tgl <- covidjkt$Tanggal
Positif <- covidjkt$Positif
Dirawat <- covidjkt$Dirawat
Sembuh <- covidjkt$Sembuh
Meninggal <- covidjkt$Meninggal
Isolasi_Mandiri <- covidjkt$SelfIsolation
dataku1 <- data.frame(Positif, retail, grocery, park, station, workplace, residental)
dataku2 <- melt(data = dataku1, id.vars = "Positif")
Inisialisasi Data per Kolom
#dataframe KASUS POSITIF
library(ggplot2)
library(reshape2)
library(tidyr)
##
## Attaching package: 'tidyr'
## The following object is masked from 'package:reshape2':
##
## smiths
dataku3 <- data.frame(Tgl, retail, grocery, park, station, workplace, residental)
dataku4 <- melt(data = dataku3, id.vars = "Tgl")
ggplot(data = dataku4, aes(x = Tgl, y = value, colour = variable))+
geom_point() +
geom_line() +
theme(legend.justification = "top") +
labs(title = "Google Mobility Index Covid 19",
subtitle = "Propinsi DKI Jakarta Indonesia SEPT 2021",
y = "Mobility Index", x = "Tanggal") +
theme(axis.text.x = element_text(angle = -90))
Dari Gambar diatas mobiity index terbanyak pada RETAIL walaupun
cenderung turun, dikarenakan banyak mall yang menerapkan aturan ketat
atau bahkan ada yang tutup
dataku5 <- data.frame(Tgl, Positif)
dataku6 <- melt(data = dataku5, id.vars = "Tgl")
ggplot(data = dataku6, aes(x = Tgl, y = value, colour = variable))+
geom_point() +
geom_line() +
theme(legend.justification = "top") +
labs(title = "Jumlah Kasus Positif Covid 19",
subtitle = "Propinsi DKI Jakarta Indonesia SEPT 2021",
y = "Jumlah Kasus", x = "Tanggal") +
theme(axis.text.x = element_text(angle = -90))
Kasus Positif terus berkembang dari hari ke hari
library(gridExtra)
## Warning: package 'gridExtra' was built under R version 4.1.3
p1=ggplot(dataku1, aes(x = Positif, y = retail)) +
geom_point() +
stat_smooth()
p2=ggplot(data = dataku1, aes(x = Positif, y = grocery)) +
geom_point() +
stat_smooth()
p3=ggplot(data = dataku1, aes(x = Positif, y = park)) +
geom_point() +
stat_smooth()
p4=ggplot(data = dataku1, aes(x = Positif, y = station)) +
geom_point() +
stat_smooth()
p5=ggplot(data = dataku1, aes(x = Positif, y = workplace)) +
geom_point() +
stat_smooth()
p6=ggplot(data = dataku1, aes(x = Positif, y = residental)) +
geom_point() +
stat_smooth()
gridExtra::grid.arrange(p1,p2,p3,p4, p5, p6, ncol=2)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
model <- lm(Positif ~ retail+grocery+park+station+workplace+residental)
summary(model)
##
## Call:
## lm(formula = Positif ~ retail + grocery + park + station + workplace +
## residental)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1552.42 -399.60 -33.81 646.02 1192.06
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 879114.81 3089.35 284.563 < 2e-16 ***
## retail 771.83 156.99 4.917 5.74e-05 ***
## grocery -466.08 74.17 -6.284 2.06e-06 ***
## park -210.43 85.22 -2.469 0.02139 *
## station 426.70 138.44 3.082 0.00526 **
## workplace -64.36 71.57 -0.899 0.37781
## residental -136.46 289.40 -0.472 0.64169
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 815.3 on 23 degrees of freedom
## Multiple R-squared: 0.8597, Adjusted R-squared: 0.8231
## F-statistic: 23.49 on 6 and 23 DF, p-value: 9.935e-09
Digaram PLOT menunjukkan terjadinya angka positif bebentuk curva hampir 45 derajad pada retail, park dan station. Dari Analisa Linear Model R Square 80% Adjusted Squared 80% dan p value diatas 5 %, yang artinya tingkat errornya rendah. Dan untuk park/taman mempunyai nilai yang paling significant yaitu 0.02139
residu <- residuals(model)
qqnorm(residu)
Pada Diagram QQPLOT berbentuk curva naik 45 derajat artinya tingkat
error untuk analisa ini rendah
cor(dataku2$value, dataku2$Positif)
## [1] 0.0314721
anova(model)
## Analysis of Variance Table
##
## Response: Positif
## Df Sum Sq Mean Sq F value Pr(>F)
## retail 1 13699582 13699582 20.6119 0.0001465 ***
## grocery 1 41074068 41074068 61.7986 5.784e-08 ***
## park 1 307605 307605 0.4628 0.5031038
## station 1 37929330 37929330 57.0671 1.131e-07 ***
## workplace 1 517677 517677 0.7789 0.3866157
## residental 1 147787 147787 0.2224 0.6416942
## Residuals 23 15286813 664644
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
plot(dataku2$Positif ~ dataku2$value, data = dataku1, col = "dodgerblue", pch = 20, cex = 1.5, main = "Mobility Index & Kasus Positif Covid 19 DKI JKT Sept 21")
abline(model) #Add a regression line
## Warning in abline(model): only using the first two of 7 regression coefficients
plot(cooks.distance(model), pch = 16, col = "blue") #Plot the Cooks Distances.
plot(model)
AIC(model)
## [1] 495.3754
BIC(model)
## [1] 506.585
head(predict(model), n = 30)
## 1 2 3 4 5 6 7 8
## 851702.8 852093.2 852069.7 852387.8 852739.9 852935.9 854147.1 855151.4
## 9 10 11 12 13 14 15 16
## 854422.8 854488.8 854785.5 854155.3 855818.8 854429.6 855800.8 854953.3
## 17 18 19 20 21 22 23 24
## 856007.3 855352.8 855004.3 855059.9 856195.5 856834.9 856470.0 856262.3
## 25 26 27 28 29 30
## 858119.2 858559.6 856691.1 856499.9 857349.6 856866.7
plot(head(predict(model), n = 30))
head(resid(model), n = 30)
## 1 2 3 4 5 6
## -446.843832 -407.194850 -40.721194 1.215545 -47.872286 -26.897950
## 7 8 9 10 11 12
## -976.148348 -1552.416916 -515.808175 -320.798402 -349.509578 586.654798
## 13 14 15 16 17 18
## -889.837939 689.390895 -376.811364 646.746701 -201.347333 653.239847
## 19 20 21 22 23 24
## 1156.747600 1192.060377 162.542453 -249.928985 279.987424 668.680647
## 25 26 27 28 29 30
## -1055.239876 -1327.567515 643.854243 939.149486 266.370710 898.303818
coef(model)
## (Intercept) retail grocery park station workplace
## 879114.81268 771.82559 -466.08349 -210.42662 426.70483 -64.36163
## residental
## -136.46322
dataku1$residuals <- model$residuals
dataku1$predicted <- model$fitted.values
dataku1
## Positif retail grocery park station workplace residental residuals
## 1 851256 -21 8 -45 -41 -34 12 -446.843832
## 2 851686 -20 7 -43 -42 -34 12 -407.194850
## 3 852029 -18 8 -37 -41 -32 13 -40.721194
## 4 852389 -21 9 -39 -35 -19 6 1.215545
## 5 852692 -24 3 -46 -38 -14 6 -47.872286
## 6 852909 -21 4 -42 -41 -34 12 -26.897950
## 7 853171 -24 -1 -46 -40 -35 13 -976.148348
## 8 853599 -24 -3 -46 -40 -34 12 -1552.416916
## 9 853907 -20 4 -41 -39 -33 11 -515.808175
## 10 854168 -17 6 -36 -39 -31 12 -320.798402
## 11 854436 -21 6 -40 -33 -18 6 -349.509578
## 12 854742 -25 0 -46 -36 -13 6 586.654798
## 13 854929 -21 1 -45 -39 -34 12 -889.837939
## 14 855119 -28 -8 -55 -44 -38 15 689.390895
## 15 855424 -20 3 -43 -38 -34 11 -376.811364
## 16 855600 -20 3 -41 -39 -34 11 646.746701
## 17 855806 -16 5 -35 -38 -32 12 -201.347333
## 18 856006 -20 6 -37 -32 -18 6 653.239847
## 19 856161 -28 -5 -50 -36 -15 7 1156.747600
## 20 856252 -20 4 -42 -38 -33 11 1192.060377
## 21 856358 -22 -1 -44 -38 -34 12 162.542453
## 22 856585 -20 -1 -42 -39 -31 11 -249.928985
## 23 856750 -19 1 -39 -38 -31 11 279.987424
## 24 856931 -15 6 -33 -37 -29 11 668.680647
## 25 857064 -16 9 -36 -29 -16 5 -1055.239876
## 26 857232 -20 1 -43 -32 -12 6 -1327.567515
## 27 857335 -18 4 -41 -37 -31 11 643.854243
## 28 857439 -18 5 -42 -37 -32 11 939.149486
## 29 857616 -17 5 -40 -36 -31 10 266.370710
## 30 857765 -15 10 -38 -34 -29 10 898.303818
## predicted
## 1 851702.8
## 2 852093.2
## 3 852069.7
## 4 852387.8
## 5 852739.9
## 6 852935.9
## 7 854147.1
## 8 855151.4
## 9 854422.8
## 10 854488.8
## 11 854785.5
## 12 854155.3
## 13 855818.8
## 14 854429.6
## 15 855800.8
## 16 854953.3
## 17 856007.3
## 18 855352.8
## 19 855004.3
## 20 855059.9
## 21 856195.5
## 22 856834.9
## 23 856470.0
## 24 856262.3
## 25 858119.2
## 26 858559.6
## 27 856691.1
## 28 856499.9
## 29 857349.6
## 30 856866.7
scatter.smooth(x=dataku2$value, y=dataku2$Positif, main="Mobiliy Index ~ Positif")
boxplot(dataku2$value, main="Covid DKI JKT Sept 21", boxplot.stats(dataku2$value)$out)
plot(density(dataku2$value), main="Data Covid DKI JKT Sept 21", ylab="Frequency")