#package
library (readr)
library (tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.2 ✔ purrr 1.0.1
## ✔ forcats 1.0.0 ✔ stringr 1.5.0
## ✔ ggplot2 3.4.2 ✔ tibble 3.2.1
## ✔ lubridate 1.9.2 ✔ tidyr 1.3.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library (DT)
library (DataExplorer)
library (Kendall)
library (vroom)
##
## Attaching package: 'vroom'
##
## The following objects are masked from 'package:readr':
##
## as.col_spec, col_character, col_date, col_datetime, col_double,
## col_factor, col_guess, col_integer, col_logical, col_number,
## col_skip, col_time, cols, cols_condense, cols_only, date_names,
## date_names_lang, date_names_langs, default_locale, fwf_cols,
## fwf_empty, fwf_positions, fwf_widths, locale, output_column,
## problems, spec
library (readxl)
library (MASS)
##
## Attaching package: 'MASS'
##
## The following object is masked from 'package:dplyr':
##
## select
library (ggplot2)
library (car)
## Loading required package: carData
##
## Attaching package: 'car'
##
## The following object is masked from 'package:dplyr':
##
## recode
##
## The following object is masked from 'package:purrr':
##
## some
#input data
data.poisson <- read_xlsx("Data Poisson.xlsx",sheet= "Data Kemiskinan")
data.poisson$Wilayah <- as.factor(data.poisson$Wilayah)
str(data.poisson)
## tibble [34 × 10] (S3: tbl_df/tbl/data.frame)
## $ Provinsi : chr [1:34] "ACEH" "SUMATERA UTARA" "SUMATERA BARAT" "RIAU" ...
## $ Penduduk.Miskin : num [1:34] 806 1268 335 485 279 ...
## $ Jumlah.Penduduk : num [1:34] 5388 14798 5546 6951 3604 ...
## $ Gini.Ratio : num [1:34] 0.348 0.343 0.329 0.363 0.351 0.358 0.37 0.341 0.248 0.342 ...
## $ Kepadatan : num [1:34] 92 205 133 75 72 93 102 262 90 258 ...
## $ KHL : num [1:34] 1732413 1271058 1474227 1872000 1708174 ...
## $ Proporsi.Lapangan.Kerja.Informal: num [1:34] 61.5 59.1 65.3 54 60 ...
## $ TPT : num [1:34] 5.97 5.47 6.17 4.4 4.7 4.74 3.39 4.31 4.18 8.02 ...
## $ IPM : num [1:34] 72.8 72.7 73.3 73.5 72.1 ...
## $ Wilayah : Factor w/ 3 levels "Barat","Tengah",..: 1 1 1 1 1 1 1 1 1 1 ...
data.poisson <- data.poisson[,-1]
str(data.poisson)
## tibble [34 × 9] (S3: tbl_df/tbl/data.frame)
## $ Penduduk.Miskin : num [1:34] 806 1268 335 485 279 ...
## $ Jumlah.Penduduk : num [1:34] 5388 14798 5546 6951 3604 ...
## $ Gini.Ratio : num [1:34] 0.348 0.343 0.329 0.363 0.351 0.358 0.37 0.341 0.248 0.342 ...
## $ Kepadatan : num [1:34] 92 205 133 75 72 93 102 262 90 258 ...
## $ KHL : num [1:34] 1732413 1271058 1474227 1872000 1708174 ...
## $ Proporsi.Lapangan.Kerja.Informal: num [1:34] 61.5 59.1 65.3 54 60 ...
## $ TPT : num [1:34] 5.97 5.47 6.17 4.4 4.7 4.74 3.39 4.31 4.18 8.02 ...
## $ IPM : num [1:34] 72.8 72.7 73.3 73.5 72.1 ...
## $ Wilayah : Factor w/ 3 levels "Barat","Tengah",..: 1 1 1 1 1 1 1 1 1 1 ...
#EDA
##summary Data
summary(data.poisson)
## Penduduk.Miskin Jumlah.Penduduk Gini.Ratio Kepadatan
## Min. : 49.0 Min. : 708.4 Min. :0.2480 Min. : 9.00
## 1st Qu.: 197.5 1st Qu.: 2360.3 1st Qu.:0.3357 1st Qu.: 54.25
## Median : 342.5 Median : 4093.9 Median :0.3495 Median : 103.50
## Mean : 769.0 Mean : 7929.5 Mean :0.3573 Mean : 744.26
## 3rd Qu.: 812.0 3rd Qu.: 8138.9 3rd Qu.:0.3880 3rd Qu.: 261.00
## Max. :4181.0 Max. :49565.2 Max. :0.4480 Max. :15978.00
## KHL Proporsi.Lapangan.Kerja.Informal TPT
## Min. : 825000 Min. :36.32 Min. :3.110
## 1st Qu.:1480618 1st Qu.:53.58 1st Qu.:3.985
## Median :1699587 Median :60.22 Median :4.775
## Mean :1710832 Mean :59.95 Mean :5.123
## 3rd Qu.:2014971 3rd Qu.:65.19 3rd Qu.:5.923
## Max. :2538174 Max. :84.11 Max. :8.530
## IPM Wilayah
## Min. :61.39 Barat :18
## 1st Qu.:70.23 Tengah:12
## Median :72.19 Timur : 4
## Mean :71.97
## 3rd Qu.:73.22
## Max. :81.65
#Penduduk Miskin
boxplot(data.poisson$Penduduk.Miskin,horizontal=TRUE)
#Gini Ratio
boxplot(data.poisson$Gini.Ratio,horizontal=TRUE)
boxplot(Penduduk.Miskin~Wilayah,data=data.poisson,horizontal=TRUE,
col =c ("lightcyan","lavender","mistyrose"),
main="Bpxplot Jumlah Penduduk dan Wilayah"
)
##Distribusi Poisson
hist(data.poisson$Penduduk.Miskin, probability=TRUE)
lines(density(data.poisson$Penduduk.Miskin),col = "red")
Histogram menunjukan data varibel respon berdistribusi poisson
Nilai lambda distribusi poisson = rata-rata
#Lambda
mean(data.poisson$Penduduk.Miskin)
## [1] 768.9706
#KS.Test
library(stats)
ks.test(data.poisson$Penduduk.Miskin, "rpois", lambda = 768.9706)
## Warning in ks.test.default(data.poisson$Penduduk.Miskin, "rpois", lambda =
## 768.9706): ties should not be present for the Kolmogorov-Smirnov test
##
## Asymptotic one-sample Kolmogorov-Smirnov test
##
## data: data.poisson$Penduduk.Miskin
## D = 828.68, p-value < 2.2e-16
## alternative hypothesis: two-sided
Uji kolmogorov-smirnov test signifikan, data variabelrespon berdistribusi poisson
#create Model
model.poisson<-glm(Penduduk.Miskin~., family = "poisson", data = data.poisson)
summary (model.poisson)
##
## Call:
## glm(formula = Penduduk.Miskin ~ ., family = "poisson", data = data.poisson)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -18.340 -8.162 -1.335 7.442 18.096
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 5.602e+00 4.597e-01 12.186 < 2e-16 ***
## Jumlah.Penduduk 4.437e-05 1.002e-06 44.301 < 2e-16 ***
## Gini.Ratio -1.897e+00 2.545e-01 -7.453 9.12e-14 ***
## Kepadatan 9.583e-05 4.460e-06 21.486 < 2e-16 ***
## KHL -5.572e-07 3.839e-08 -14.514 < 2e-16 ***
## Proporsi.Lapangan.Kerja.Informal 4.812e-02 1.795e-03 26.802 < 2e-16 ***
## TPT 5.791e-02 7.059e-03 8.204 2.33e-16 ***
## IPM -1.692e-02 4.836e-03 -3.498 0.000469 ***
## WilayahTengah -3.595e-01 2.224e-02 -16.163 < 2e-16 ***
## WilayahTimur -5.303e-01 4.069e-02 -13.033 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for poisson family taken to be 1)
##
## Null deviance: 32854.7 on 33 degrees of freedom
## Residual deviance: 3083.5 on 24 degrees of freedom
## AIC: 3370.7
##
## Number of Fisher Scoring iterations: 5
Residual deviance: 3083.5 on 24 degrees of freedom
3083.5/24
## [1] 128.4792
data variabel Y overdispersi
# Test overdispersi
library (AER)
## Loading required package: lmtest
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
## Loading required package: sandwich
## Loading required package: survival
dispersiontest(model.poisson)
##
## Overdispersion test
##
## data: model.poisson
## z = 5.0288, p-value = 2.467e-07
## alternative hypothesis: true dispersion is greater than 1
## sample estimates:
## dispersion
## 88.81769
nilai dispersion = 88.81769 (Overdispersi)
# Test overdispersi
library (DHARMa)
## This is DHARMa 0.4.6. For overview type '?DHARMa'. For recent changes, type news(package = 'DHARMa')
sim.poisson <- simulateResiduals(model.poisson, refit = TRUE)
testOverdispersion(sim.poisson)
## testOverdispersion is deprecated, switch your code to using the testDispersion function
##
## DHARMa nonparametric dispersion test via mean deviance residual fitted
## vs. simulated-refitted
##
## data: simulationOutput
## dispersion = 128.89, p-value < 2.2e-16
## alternative hypothesis: two.sided
#Alternatif Model(Negatif Binominal)
model.nb <- glm.nb(Penduduk.Miskin~. ,data.poisson)
summary(model.nb)
##
## Call:
## glm.nb(formula = Penduduk.Miskin ~ ., data = data.poisson, init.theta = 5.242434309,
## link = log)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.25068 -0.74447 -0.06731 0.58807 1.70810
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -2.355e-01 4.334e+00 -0.054 0.9567
## Jumlah.Penduduk 5.135e-05 1.211e-05 4.239 2.24e-05 ***
## Gini.Ratio 8.191e-01 2.445e+00 0.335 0.7376
## Kepadatan 4.890e-05 4.335e-05 1.128 0.2593
## KHL -2.404e-07 3.732e-07 -0.644 0.5194
## Proporsi.Lapangan.Kerja.Informal 6.745e-02 1.717e-02 3.929 8.54e-05 ***
## TPT 1.890e-01 7.943e-02 2.379 0.0173 *
## IPM 1.702e-02 4.641e-02 0.367 0.7138
## WilayahTengah -3.769e-01 1.921e-01 -1.962 0.0497 *
## WilayahTimur -6.037e-01 3.591e-01 -1.681 0.0927 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for Negative Binomial(5.2424) family taken to be 1)
##
## Null deviance: 219.864 on 33 degrees of freedom
## Residual deviance: 35.077 on 24 degrees of freedom
## AIC: 474.46
##
## Number of Fisher Scoring iterations: 1
##
##
## Theta: 5.24
## Std. Err.: 1.26
##
## 2 x log-likelihood: -452.465
35.077/24
## [1] 1.461542
#Alternatif Model(Negatif Binomial dengan seleksi variabel)
model.nb2 <- glm.nb(Penduduk.Miskin~Jumlah.Penduduk +Proporsi.Lapangan.Kerja.Informal +TPT+Wilayah, data.poisson)
summary(model.nb2)
##
## Call:
## glm.nb(formula = Penduduk.Miskin ~ Jumlah.Penduduk + Proporsi.Lapangan.Kerja.Informal +
## TPT + Wilayah, data = data.poisson, init.theta = 4.678079991,
## link = log)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.02405 -0.85207 0.05513 0.65772 1.55880
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 1.476e+00 9.999e-01 1.476 0.13982
## Jumlah.Penduduk 5.949e-05 9.035e-06 6.585 4.56e-11 ***
## Proporsi.Lapangan.Kerja.Informal 5.853e-02 1.180e-02 4.961 7.02e-07 ***
## TPT 1.800e-01 8.316e-02 2.165 0.03039 *
## WilayahTengah -4.168e-01 1.951e-01 -2.136 0.03268 *
## WilayahTimur -8.259e-01 3.136e-01 -2.634 0.00845 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for Negative Binomial(4.6781) family taken to be 1)
##
## Null deviance: 196.429 on 33 degrees of freedom
## Residual deviance: 35.254 on 28 degrees of freedom
## AIC: 470.57
##
## Number of Fisher Scoring iterations: 1
##
##
## Theta: 4.68
## Std. Err.: 1.12
##
## 2 x log-likelihood: -456.571
35.254/28
## [1] 1.259071
sim.nb2 <-simulateResiduals(model.nb2, refit = TRUE)
testOverdispersion(model.nb2)
## testOverdispersion is deprecated, switch your code to using the testDispersion function
##
## DHARMa nonparametric dispersion test via sd of residuals fitted vs.
## simulated
##
## data: simulationOutput
## dispersion = 1.1429, p-value = 0.568
## alternative hypothesis: two.sided
plotSimulatedResiduals(sim.poisson)
## plotSimulatedResiduals is deprecated, please switch your code to simply using the plot() function
AIC(model.poisson) #model Poisson
## [1] 3370.733
AIC(model.nb) #Model Negative Binomial dengan semua variabel
## [1] 474.4648
AIC(model.nb2) #Model Negative Binomial dengan seleksi variabel
## [1] 470.5705
prediksi.poisson <- predict(model.poisson, data = data, type = "response")
Prediksi.nb2 <- predict(model.nb2, data = data.poisson, type = "response")
prediksi.poisson
## 1 2 3 4 5 6 7 8
## 542.0508 942.3193 791.0109 329.0612 440.9737 562.9618 594.6896 1156.9733
## 9 10 11 12 13 14 15 16
## 233.5222 139.6040 514.0109 4761.3663 2989.0587 383.2262 4366.4813 588.0338
## 17 18 19 20 21 22 23 24
## 236.5721 744.8363 779.8757 541.0312 203.6248 278.7523 128.6405 155.3389
## 25 26 27 28 29 30 31 32
## 312.6241 486.3034 400.2148 299.6829 276.9495 456.2095 269.8443 252.9406
## 33 34
## 226.5107 759.7043
Prediksi.nb2
## 1 2 3 4 5 6 7 8
## 644.4161 896.8046 842.7956 345.0861 423.0661 695.8412 472.6613 1038.4354
## 9 10 11 12 13 14 15 16
## 185.1481 178.2976 300.1924 9178.1713 3292.0170 246.0006 4707.7758 725.0920
## 17 18 19 20 21 22 23 24
## 204.5427 656.5210 593.1264 443.5414 222.8670 226.6446 161.8953 121.6825
## 25 26 27 28 29 30 31 32
## 344.8523 356.3822 568.5619 261.6412 249.1101 504.1308 276.1942 235.7563
## 33 34
## 195.9503 616.0393
plot.prediksi <- data.frame(data.poisson$Penduduk.Miskin, prediksi.poisson, Prediksi.nb2)
colnames(plot.prediksi) <- c("Penduduk_Miskin", "Prediksi_Poisson", "Prediksi_nb2")
plot.prediksi
x <- 1:34
plot(x, plot.prediksi$Penduduk_Miskin,
type = "o",
xlab = "Amatan",
ylab = "Jumlah Penduduk Miskin",
main = "Plot Prediksi dan Data Asli"
)
lines (x,plot.prediksi$Prediksi_nb2,
type="l",
col = "red")
data.frame(model.nb2$coefficients, exp(model.nb2$coefficients))
Jumlah penduduk, exp(5.948951e-05) = 1.0000595 setiap peningkatan 1000 jumlah penduduk, akan meningkatkan jumlah penduduk miskin, 1.0000595 kali lipat
Proposi lapangan kerja informal, exp(5.852904e-02) = 1.06 setiap peningkatan 1% proposi lapangan kerja informal, akan meningkatkan jumlah penduduk miskin 1.06 kali lipat
wilayah tengah 0.6591, peluang daerah yang berada di wilayah tengah untuk mengalami peningkatan penduduk miskin adalah 0.6591 kali dibanding daerah yang berada diwilayah barat.
data.baru <- read_xlsx("Data Poisson.xlsx", sheet="Data Kemiskinan Baru")
data.baru$Wilayah <- as.factor(data.baru$Wilayah)
data.baru
predict(model.nb2, newdata = data.baru, type = "response")
## 1 2 3 4 5 6 7 8
## 780.7106 481.2674 1054.4532 674.3060 585.8197 365.6862 139.8690 238.2916
## 9 10
## 102.6512 433.0340