library(readxl)
#baca data excel
mobindexjkt <- read_excel("mobindex_jkt.xlsx")
covidjkt <- read_excel("c19data_jkt.xlsx")
Datagab <- read_excel("datacovmob.xlsx")
retail <- mobindexjkt$retail_and_recreation_percent_change_from_baseline
grocery <- mobindexjkt$grocery_and_pharmacy_percent_change_from_baseline
park <- mobindexjkt$parks_percent_change_from_baseline
station <- mobindexjkt$transit_stations_percent_change_from_baseline
workplace <- mobindexjkt$workplaces_percent_change_from_baseline
residental <- mobindexjkt$residential_percent_change_from_baseline
date <- mobindexjkt$date
Positif <- covidjkt$Positif
Dirawat <- covidjkt$Dirawat
Sembuh <- covidjkt$Sembuh
Meninggal <- covidjkt$Meninggal
Isoman <- covidjkt$SelfIsolation
#Visualisasi Data Menggunakan Fungsi plot(). #Fungsi plot() merupakan fungsi umum yang digunakan untuk membuat pola pada R.
#korelasi kasus sembuh vs retail
cor(Dirawat,retail)
## [1] 0.183887
#korelasi kasus sembuh vs grocery
cor(Dirawat,grocery)
## [1] 0.3371597
#korelasi kasus sembuh vs park
cor(Dirawat,park)
## [1] -0.4263309
#korelasi kasus sembuh vs station
cor(Dirawat,station)
## [1] -0.07740459
#korelasi kasus sembuh vs workplace
cor(Dirawat,workplace)
## [1] 0.03101298
#korelasi kasus sembuh vs residental
cor(Dirawat,residental)
## [1] -0.07503062
summary(covidjkt)
## Tanggal Positif Dirawat
## Min. :2021-10-01 00:00:00 Min. :857916 Min. :256.0
## 1st Qu.:2021-10-08 12:00:00 1st Qu.:858971 1st Qu.:289.5
## Median :2021-10-16 00:00:00 Median :860014 Median :348.0
## Mean :2021-10-16 00:00:00 Mean :859866 Mean :357.8
## 3rd Qu.:2021-10-23 12:00:00 3rd Qu.:860815 3rd Qu.:437.0
## Max. :2021-10-31 00:00:00 Max. :861540 Max. :504.0
## Sembuh Meninggal SelfIsolation
## Min. :842715 Min. :13524 Min. : 618.0
## 1st Qu.:843780 1st Qu.:13542 1st Qu.: 849.0
## Median :845121 Median :13551 Median : 978.0
## Mean :844977 Mean :13549 Mean : 983.1
## 3rd Qu.:846121 3rd Qu.:13556 3rd Qu.:1202.0
## Max. :847021 Max. :13562 Max. :1285.0
#dataframe KASUS SEMBUH
ds <- data.frame(date, retail, grocery, park, station, workplace, residental)
library(ggplot2)
library(reshape2)
dataku <- melt(data= ds, id.vars = "date")
ggplot(data = dataku, aes(x = date, y = value, colour = variable))+
geom_point() +
geom_line() +
theme(legend.justification = "top") +
labs(title = "Grafik Google Mobility Index",
subtitle = "Propinsi DKI Jakarta Indonesia Okt 2021",
y = "Index Moility", x = "Tanggal") +
theme(axis.text.x = element_text(angle = -90))
head(Datagab)
## # A tibble: 6 x 11
## Tanggal Positif Dirawat Sembuh Meninggal Retail Grocery Parks
## <dttm> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 2021-10-01 00:00:00 857916 504 842715 13524 -9 16 -29
## 2 2021-10-02 00:00:00 858071 483 842842 13529 -11 17 -32
## 3 2021-10-03 00:00:00 858198 500 842929 13534 -13 11 -39
## 4 2021-10-04 00:00:00 858347 461 843091 13534 -17 5 -39
## 5 2021-10-05 00:00:00 858419 444 843239 13538 -15 9 -39
## 6 2021-10-06 00:00:00 858622 451 843414 13540 -15 6 -35
## # ... with 3 more variables: Transit <dbl>, Workplaces <dbl>, Residential <dbl>
p <- ggplot(Datagab, aes(x = Tanggal))
p <- p + geom_line(aes(y = Dirawat/10, colour = "dirawat"))
p <- p + geom_line(aes(y = workplace, colour = "workplace"))
p <- p + scale_y_continuous(sec.axis = sec_axis(~.*1, name = "workplace"))
p
model <- lm(Dirawat ~ workplace)
summary(model)
##
## Call:
## lm(formula = Dirawat ~ workplace)
##
## Residuals:
## Min 1Q Median 3Q Max
## -101.21 -71.27 -12.77 78.01 147.03
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 363.6099 37.9072 9.592 1.68e-10 ***
## workplace 0.2371 1.4190 0.167 0.868
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 82.05 on 29 degrees of freedom
## Multiple R-squared: 0.0009618, Adjusted R-squared: -0.03349
## F-statistic: 0.02792 on 1 and 29 DF, p-value: 0.8685
anova(model)
## Analysis of Variance Table
##
## Response: Dirawat
## Df Sum Sq Mean Sq F value Pr(>F)
## workplace 1 188 188 0.0279 0.8685
## Residuals 29 195227 6732
plot(covidjkt$Dirawat ~ covidjkt$Tanggal, data = covidjkt, col = "dodgerblue", pch = 20, cex = 1.5, main = "Kasus Dirawat Covid 19")
plot(cooks.distance(model), pch = 16, col = "blue")
plot(model)
AIC(model)
## [1] 365.1601
BIC(model)
## [1] 369.4621
head(predict(model), n = 31)
## 1 2 3 4 5 6 7 8
## 356.9711 359.5792 360.5276 356.4969 356.2598 356.4969 356.7340 356.9711
## 9 10 11 12 13 14 15 16
## 359.8163 361.0018 356.9711 356.4969 356.9711 356.9711 357.4453 359.8163
## 17 18 19 20 21 22 23 24
## 360.7647 356.2598 356.0227 348.1984 356.9711 357.4453 360.2905 361.2389
## 25 26 27 28 29 30 31
## 357.2082 356.9711 357.2082 357.2082 357.9195 360.2905 361.4760
plot(head(predict(model), n = 10))
head(resid(model), n = 31)
## 1 2 3 4 5 6
## 147.028887 123.420787 139.472386 104.503087 87.740188 94.503087
## 7 8 9 10 11 12
## 91.265987 79.028887 67.183686 76.998186 29.028887 -46.496913
## 13 14 15 16 17 18
## 2.028887 12.028887 21.554687 -9.816314 -12.764714 -26.259812
## 19 20 21 22 23 24
## -25.022712 -36.198410 -56.971113 -56.445313 -68.290514 -74.238914
## 25 26 27 28 29 30
## -78.208213 -100.971113 -97.208213 -101.208213 -98.919513 -96.290514
## 31
## -90.476014
coef(model)
## (Intercept) workplace
## 363.6099147 0.2371001
Datagab$residuals <- model$residuals
Datagab$predicted <- model$fitted.values
Datagab
## # A tibble: 31 x 13
## Tanggal Positif Dirawat Sembuh Meninggal Retail Grocery Parks
## <dttm> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 2021-10-01 00:00:00 857916 504 842715 13524 -9 16 -29
## 2 2021-10-02 00:00:00 858071 483 842842 13529 -11 17 -32
## 3 2021-10-03 00:00:00 858198 500 842929 13534 -13 11 -39
## 4 2021-10-04 00:00:00 858347 461 843091 13534 -17 5 -39
## 5 2021-10-05 00:00:00 858419 444 843239 13538 -15 9 -39
## 6 2021-10-06 00:00:00 858622 451 843414 13540 -15 6 -35
## 7 2021-10-07 00:00:00 858771 448 843529 13541 -13 7 -32
## 8 2021-10-08 00:00:00 858921 436 843738 13541 -10 9 -25
## 9 2021-10-09 00:00:00 859021 427 843822 13543 -12 12 -30
## 10 2021-10-10 00:00:00 859161 438 843891 13547 -14 8 -35
## # ... with 21 more rows, and 5 more variables: Transit <dbl>, Workplaces <dbl>,
## # Residential <dbl>, residuals <dbl>, predicted <dbl>
scatter.smooth(x=Datagab$Tanggal, y=Datagab$Dirawat, main="Tanggal ~ Dirawat")
boxplot(Datagab$Dirawat, main="Dirawat", boxplot.stats(Datagab$Dirawat)$out)
require(fuzzyreg)
## Loading required package: fuzzyreg
f = fuzzylm(Dirawat ~ workplace, data = Datagab)
plot(f, res = 20, col = "lightblue", main = "PLRLS")
summary(f)
##
## Central tendency of the fuzzy regression model:
## Dirawat = 363.61 + 0.2371 * workplace
##
## Lower boundary of the model support interval:
## Dirawat = 272.6154 + -0.2667 * workplace
##
## Upper boundary of the model support interval:
## Dirawat = 496.5333 + 0.6154 * workplace
##
## The total error of fit: 3807220034
## The mean squared distance between response and prediction: 50115.57