#package

library (readr)
library (tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.2     ✔ purrr     1.0.1
## ✔ forcats   1.0.0     ✔ stringr   1.5.0
## ✔ ggplot2   3.4.2     ✔ tibble    3.2.1
## ✔ lubridate 1.9.2     ✔ tidyr     1.3.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library (DT)
library (DataExplorer)
library (Kendall)
library (vroom)
## 
## Attaching package: 'vroom'
## 
## The following objects are masked from 'package:readr':
## 
##     as.col_spec, col_character, col_date, col_datetime, col_double,
##     col_factor, col_guess, col_integer, col_logical, col_number,
##     col_skip, col_time, cols, cols_condense, cols_only, date_names,
##     date_names_lang, date_names_langs, default_locale, fwf_cols,
##     fwf_empty, fwf_positions, fwf_widths, locale, output_column,
##     problems, spec
library (readxl)
library (MASS)
## 
## Attaching package: 'MASS'
## 
## The following object is masked from 'package:dplyr':
## 
##     select
library (ggplot2)
library (car)
## Loading required package: carData
## 
## Attaching package: 'car'
## 
## The following object is masked from 'package:dplyr':
## 
##     recode
## 
## The following object is masked from 'package:purrr':
## 
##     some

#input data

data.poisson <- read_xlsx("Data Poisson.xlsx",sheet= "Data Kemiskinan")
data.poisson$Wilayah <- as.factor(data.poisson$Wilayah)

str(data.poisson)
## tibble [34 × 10] (S3: tbl_df/tbl/data.frame)
##  $ Provinsi                        : chr [1:34] "ACEH" "SUMATERA UTARA" "SUMATERA BARAT" "RIAU" ...
##  $ Penduduk.Miskin                 : num [1:34] 806 1268 335 485 279 ...
##  $ Jumlah.Penduduk                 : num [1:34] 5388 14798 5546 6951 3604 ...
##  $ Gini.Ratio                      : num [1:34] 0.348 0.343 0.329 0.363 0.351 0.358 0.37 0.341 0.248 0.342 ...
##  $ Kepadatan                       : num [1:34] 92 205 133 75 72 93 102 262 90 258 ...
##  $ KHL                             : num [1:34] 1732413 1271058 1474227 1872000 1708174 ...
##  $ Proporsi.Lapangan.Kerja.Informal: num [1:34] 61.5 59.1 65.3 54 60 ...
##  $ TPT                             : num [1:34] 5.97 5.47 6.17 4.4 4.7 4.74 3.39 4.31 4.18 8.02 ...
##  $ IPM                             : num [1:34] 72.8 72.7 73.3 73.5 72.1 ...
##  $ Wilayah                         : Factor w/ 3 levels "Barat","Tengah",..: 1 1 1 1 1 1 1 1 1 1 ...
data.poisson <- data.poisson[,-1]
str(data.poisson)
## tibble [34 × 9] (S3: tbl_df/tbl/data.frame)
##  $ Penduduk.Miskin                 : num [1:34] 806 1268 335 485 279 ...
##  $ Jumlah.Penduduk                 : num [1:34] 5388 14798 5546 6951 3604 ...
##  $ Gini.Ratio                      : num [1:34] 0.348 0.343 0.329 0.363 0.351 0.358 0.37 0.341 0.248 0.342 ...
##  $ Kepadatan                       : num [1:34] 92 205 133 75 72 93 102 262 90 258 ...
##  $ KHL                             : num [1:34] 1732413 1271058 1474227 1872000 1708174 ...
##  $ Proporsi.Lapangan.Kerja.Informal: num [1:34] 61.5 59.1 65.3 54 60 ...
##  $ TPT                             : num [1:34] 5.97 5.47 6.17 4.4 4.7 4.74 3.39 4.31 4.18 8.02 ...
##  $ IPM                             : num [1:34] 72.8 72.7 73.3 73.5 72.1 ...
##  $ Wilayah                         : Factor w/ 3 levels "Barat","Tengah",..: 1 1 1 1 1 1 1 1 1 1 ...

#EDA

##summary Data

summary(data.poisson)
##  Penduduk.Miskin  Jumlah.Penduduk     Gini.Ratio       Kepadatan       
##  Min.   :  49.0   Min.   :  708.4   Min.   :0.2480   Min.   :    9.00  
##  1st Qu.: 197.5   1st Qu.: 2360.3   1st Qu.:0.3357   1st Qu.:   54.25  
##  Median : 342.5   Median : 4093.9   Median :0.3495   Median :  103.50  
##  Mean   : 769.0   Mean   : 7929.5   Mean   :0.3573   Mean   :  744.26  
##  3rd Qu.: 812.0   3rd Qu.: 8138.9   3rd Qu.:0.3880   3rd Qu.:  261.00  
##  Max.   :4181.0   Max.   :49565.2   Max.   :0.4480   Max.   :15978.00  
##       KHL          Proporsi.Lapangan.Kerja.Informal      TPT       
##  Min.   : 825000   Min.   :36.32                    Min.   :3.110  
##  1st Qu.:1480618   1st Qu.:53.58                    1st Qu.:3.985  
##  Median :1699587   Median :60.22                    Median :4.775  
##  Mean   :1710832   Mean   :59.95                    Mean   :5.123  
##  3rd Qu.:2014971   3rd Qu.:65.19                    3rd Qu.:5.923  
##  Max.   :2538174   Max.   :84.11                    Max.   :8.530  
##       IPM          Wilayah  
##  Min.   :61.39   Barat :18  
##  1st Qu.:70.23   Tengah:12  
##  Median :72.19   Timur : 4  
##  Mean   :71.97              
##  3rd Qu.:73.22              
##  Max.   :81.65

Boxplot

#Penduduk Miskin
boxplot(data.poisson$Penduduk.Miskin,horizontal=TRUE)

#Gini Ratio
boxplot(data.poisson$Gini.Ratio,horizontal=TRUE)

boxplot(Penduduk.Miskin~Wilayah,data=data.poisson,horizontal=TRUE, 
        col =c ("lightcyan","lavender","mistyrose"),
        main="Bpxplot Jumlah Penduduk dan Wilayah"
        )

##Distribusi Poisson

hist(data.poisson$Penduduk.Miskin, probability=TRUE)
lines(density(data.poisson$Penduduk.Miskin),col = "red")

Histogram menunjukan data varibel respon berdistribusi poisson

Nilai lambda distribusi poisson = rata-rata

#Lambda

mean(data.poisson$Penduduk.Miskin)
## [1] 768.9706
#KS.Test

library(stats)
ks.test(data.poisson$Penduduk.Miskin, "rpois", lambda = 768.9706)
## Warning in ks.test.default(data.poisson$Penduduk.Miskin, "rpois", lambda =
## 768.9706): ties should not be present for the Kolmogorov-Smirnov test
## 
##  Asymptotic one-sample Kolmogorov-Smirnov test
## 
## data:  data.poisson$Penduduk.Miskin
## D = 828.68, p-value < 2.2e-16
## alternative hypothesis: two-sided

Uji kolmogorov-smirnov test signifikan, data variabelrespon berdistribusi poisson

#create Model

model.poisson<-glm(Penduduk.Miskin~., family = "poisson", data = data.poisson)
summary (model.poisson)
## 
## Call:
## glm(formula = Penduduk.Miskin ~ ., family = "poisson", data = data.poisson)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -18.340   -8.162   -1.335    7.442   18.096  
## 
## Coefficients:
##                                    Estimate Std. Error z value Pr(>|z|)    
## (Intercept)                       5.602e+00  4.597e-01  12.186  < 2e-16 ***
## Jumlah.Penduduk                   4.437e-05  1.002e-06  44.301  < 2e-16 ***
## Gini.Ratio                       -1.897e+00  2.545e-01  -7.453 9.12e-14 ***
## Kepadatan                         9.583e-05  4.460e-06  21.486  < 2e-16 ***
## KHL                              -5.572e-07  3.839e-08 -14.514  < 2e-16 ***
## Proporsi.Lapangan.Kerja.Informal  4.812e-02  1.795e-03  26.802  < 2e-16 ***
## TPT                               5.791e-02  7.059e-03   8.204 2.33e-16 ***
## IPM                              -1.692e-02  4.836e-03  -3.498 0.000469 ***
## WilayahTengah                    -3.595e-01  2.224e-02 -16.163  < 2e-16 ***
## WilayahTimur                     -5.303e-01  4.069e-02 -13.033  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for poisson family taken to be 1)
## 
##     Null deviance: 32854.7  on 33  degrees of freedom
## Residual deviance:  3083.5  on 24  degrees of freedom
## AIC: 3370.7
## 
## Number of Fisher Scoring iterations: 5

Equidispersi test

Residual deviance: 3083.5 on 24 degrees of freedom

3083.5/24
## [1] 128.4792

data variabel Y overdispersi

# Test overdispersi

library (AER)
## Loading required package: lmtest
## Loading required package: zoo
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
## Loading required package: sandwich
## Loading required package: survival
dispersiontest(model.poisson)
## 
##  Overdispersion test
## 
## data:  model.poisson
## z = 5.0288, p-value = 2.467e-07
## alternative hypothesis: true dispersion is greater than 1
## sample estimates:
## dispersion 
##   88.81769

nilai dispersion = 88.81769 (Overdispersi)

# Test overdispersi

library (DHARMa)
## This is DHARMa 0.4.6. For overview type '?DHARMa'. For recent changes, type news(package = 'DHARMa')
sim.poisson <- simulateResiduals(model.poisson, refit = TRUE)
testOverdispersion(sim.poisson)
## testOverdispersion is deprecated, switch your code to using the testDispersion function

## 
##  DHARMa nonparametric dispersion test via mean deviance residual fitted
##  vs. simulated-refitted
## 
## data:  simulationOutput
## dispersion = 128.89, p-value < 2.2e-16
## alternative hypothesis: two.sided

#Alternatif Model(Negatif Binominal)

model.nb <- glm.nb(Penduduk.Miskin~. ,data.poisson)
summary(model.nb)
## 
## Call:
## glm.nb(formula = Penduduk.Miskin ~ ., data = data.poisson, init.theta = 5.242434309, 
##     link = log)
## 
## Deviance Residuals: 
##      Min        1Q    Median        3Q       Max  
## -2.25068  -0.74447  -0.06731   0.58807   1.70810  
## 
## Coefficients:
##                                    Estimate Std. Error z value Pr(>|z|)    
## (Intercept)                      -2.355e-01  4.334e+00  -0.054   0.9567    
## Jumlah.Penduduk                   5.135e-05  1.211e-05   4.239 2.24e-05 ***
## Gini.Ratio                        8.191e-01  2.445e+00   0.335   0.7376    
## Kepadatan                         4.890e-05  4.335e-05   1.128   0.2593    
## KHL                              -2.404e-07  3.732e-07  -0.644   0.5194    
## Proporsi.Lapangan.Kerja.Informal  6.745e-02  1.717e-02   3.929 8.54e-05 ***
## TPT                               1.890e-01  7.943e-02   2.379   0.0173 *  
## IPM                               1.702e-02  4.641e-02   0.367   0.7138    
## WilayahTengah                    -3.769e-01  1.921e-01  -1.962   0.0497 *  
## WilayahTimur                     -6.037e-01  3.591e-01  -1.681   0.0927 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for Negative Binomial(5.2424) family taken to be 1)
## 
##     Null deviance: 219.864  on 33  degrees of freedom
## Residual deviance:  35.077  on 24  degrees of freedom
## AIC: 474.46
## 
## Number of Fisher Scoring iterations: 1
## 
## 
##               Theta:  5.24 
##           Std. Err.:  1.26 
## 
##  2 x log-likelihood:  -452.465
35.077/24
## [1] 1.461542

#Alternatif Model(Negatif Binomial dengan seleksi variabel)

model.nb2 <- glm.nb(Penduduk.Miskin~Jumlah.Penduduk +Proporsi.Lapangan.Kerja.Informal +TPT+Wilayah, data.poisson)
summary(model.nb2)
## 
## Call:
## glm.nb(formula = Penduduk.Miskin ~ Jumlah.Penduduk + Proporsi.Lapangan.Kerja.Informal + 
##     TPT + Wilayah, data = data.poisson, init.theta = 4.678079991, 
##     link = log)
## 
## Deviance Residuals: 
##      Min        1Q    Median        3Q       Max  
## -2.02405  -0.85207   0.05513   0.65772   1.55880  
## 
## Coefficients:
##                                    Estimate Std. Error z value Pr(>|z|)    
## (Intercept)                       1.476e+00  9.999e-01   1.476  0.13982    
## Jumlah.Penduduk                   5.949e-05  9.035e-06   6.585 4.56e-11 ***
## Proporsi.Lapangan.Kerja.Informal  5.853e-02  1.180e-02   4.961 7.02e-07 ***
## TPT                               1.800e-01  8.316e-02   2.165  0.03039 *  
## WilayahTengah                    -4.168e-01  1.951e-01  -2.136  0.03268 *  
## WilayahTimur                     -8.259e-01  3.136e-01  -2.634  0.00845 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for Negative Binomial(4.6781) family taken to be 1)
## 
##     Null deviance: 196.429  on 33  degrees of freedom
## Residual deviance:  35.254  on 28  degrees of freedom
## AIC: 470.57
## 
## Number of Fisher Scoring iterations: 1
## 
## 
##               Theta:  4.68 
##           Std. Err.:  1.12 
## 
##  2 x log-likelihood:  -456.571
35.254/28
## [1] 1.259071

Plot Residual model negatif binomial 2

sim.nb2 <-simulateResiduals(model.nb2, refit = TRUE)
testOverdispersion(model.nb2)
## testOverdispersion is deprecated, switch your code to using the testDispersion function

## 
##  DHARMa nonparametric dispersion test via sd of residuals fitted vs.
##  simulated
## 
## data:  simulationOutput
## dispersion = 1.1429, p-value = 0.568
## alternative hypothesis: two.sided

Plot Residual

plotSimulatedResiduals(sim.poisson)
## plotSimulatedResiduals is deprecated, please switch your code to simply using the plot() function

perbandingan nilai AIC

AIC(model.poisson) #model Poisson
## [1] 3370.733
AIC(model.nb) #Model Negative Binomial dengan semua variabel
## [1] 474.4648
AIC(model.nb2) #Model Negative Binomial dengan seleksi variabel
## [1] 470.5705

Plot Prediksi

prediksi.poisson <- predict(model.poisson, data = data, type = "response")
Prediksi.nb2 <- predict(model.nb2, data = data.poisson, type = "response")
prediksi.poisson
##         1         2         3         4         5         6         7         8 
##  542.0508  942.3193  791.0109  329.0612  440.9737  562.9618  594.6896 1156.9733 
##         9        10        11        12        13        14        15        16 
##  233.5222  139.6040  514.0109 4761.3663 2989.0587  383.2262 4366.4813  588.0338 
##        17        18        19        20        21        22        23        24 
##  236.5721  744.8363  779.8757  541.0312  203.6248  278.7523  128.6405  155.3389 
##        25        26        27        28        29        30        31        32 
##  312.6241  486.3034  400.2148  299.6829  276.9495  456.2095  269.8443  252.9406 
##        33        34 
##  226.5107  759.7043
Prediksi.nb2
##         1         2         3         4         5         6         7         8 
##  644.4161  896.8046  842.7956  345.0861  423.0661  695.8412  472.6613 1038.4354 
##         9        10        11        12        13        14        15        16 
##  185.1481  178.2976  300.1924 9178.1713 3292.0170  246.0006 4707.7758  725.0920 
##        17        18        19        20        21        22        23        24 
##  204.5427  656.5210  593.1264  443.5414  222.8670  226.6446  161.8953  121.6825 
##        25        26        27        28        29        30        31        32 
##  344.8523  356.3822  568.5619  261.6412  249.1101  504.1308  276.1942  235.7563 
##        33        34 
##  195.9503  616.0393
plot.prediksi <- data.frame(data.poisson$Penduduk.Miskin, prediksi.poisson, Prediksi.nb2)
colnames(plot.prediksi) <- c("Penduduk_Miskin", "Prediksi_Poisson", "Prediksi_nb2")

plot.prediksi
x <- 1:34

plot(x, plot.prediksi$Penduduk_Miskin,
     type = "o",
     xlab = "Amatan",
     ylab = "Jumlah Penduduk Miskin",
     main = "Plot Prediksi dan Data Asli"
     )

lines (x,plot.prediksi$Prediksi_nb2,
       type="l",
       col = "red")

Interpretasi koefisien regresi

data.frame(model.nb2$coefficients, exp(model.nb2$coefficients))

Jumlah penduduk, exp(5.948951e-05) = 1.0000595 setiap peningkatan 1000 jumlah penduduk, akan meningkatkan jumlah penduduk miskin, 1.0000595 kali lipat

Proposi lapangan kerja informal, exp(5.852904e-02) = 1.06 setiap peningkatan 1% proposi lapangan kerja informal, akan meningkatkan jumlah penduduk miskin 1.06 kali lipat

wilayah tengah 0.6591, peluang daerah yang berada di wilayah tengah untuk mengalami peningkatan penduduk miskin adalah 0.6591 kali dibanding daerah yang berada diwilayah barat.

pada data baru

data.baru <- read_xlsx("Data Poisson.xlsx", sheet="Data Kemiskinan Baru")
data.baru$Wilayah <- as.factor(data.baru$Wilayah)
data.baru
predict(model.nb2, newdata = data.baru, type = "response")
##         1         2         3         4         5         6         7         8 
##  780.7106  481.2674 1054.4532  674.3060  585.8197  365.6862  139.8690  238.2916 
##         9        10 
##  102.6512  433.0340