library(readxl)
## Warning: package 'readxl' was built under R version 4.4.2
data <- read_excel("C:/Users/Raiqa/Downloads/Data Miskin Kak Bintang.xlsx")
# Lihat Data
head(data)
## # A tibble: 6 × 11
##   `Nama Wilayah`      P0 RLS     PPK   IPM   UHH  SNTS   AIR   TPT  TPAK    PDRB
##   <chr>            <dbl> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>   <dbl>
## 1 DKI JAKARTA      NA    <NA>     NA  NA    NA    NA    NA   NA     NA   NA     
## 2 Kepulauan Seribu 15.1  8.81  12587  72.1  69.0  83.8  94.2  8.58  65.5  3.65e6
## 3 Kota Jakarta Se…  3.56 11.64 23888  84.9  74.2  96.5 100.   7.33  61.4  4.29e8
## 4 Kota Jakarta Ti…  4.28 11.67 17733  83.0  74.5  96.7  99.6  8.23  60.9  3.14e8
## 5 Kota Jakarta Pu…  4.94 11.39 17365  81.6  74.2  89.6 100    7.75  63.2  4.60e8
## 6 Kota Jakarta Ba…  4.31 10.78 20801  81.8  73.7  97.7 100    9.06  63.2  3.28e8
str(data)
## tibble [17 × 11] (S3: tbl_df/tbl/data.frame)
##  $ Nama Wilayah: chr [1:17] "DKI JAKARTA" "Kepulauan Seribu" "Kota Jakarta Selatan" "Kota Jakarta Timur" ...
##  $ P0          : num [1:17] NA 15.06 3.56 4.28 4.94 ...
##  $ RLS         : chr [1:17] NA "8.81" "11.64" "11.67" ...
##  $ PPK         : num [1:17] NA 12587 23888 17733 17365 ...
##  $ IPM         : num [1:17] NA 72.1 84.9 83 81.6 ...
##  $ UHH         : num [1:17] NA 69 74.2 74.5 74.2 ...
##  $ SNTS        : num [1:17] NA 83.8 96.5 96.7 89.6 ...
##  $ AIR         : num [1:17] NA 94.2 100 99.6 100 ...
##  $ TPT         : num [1:17] NA 8.58 7.33 8.23 7.75 ...
##  $ TPAK        : num [1:17] NA 65.5 61.4 60.9 63.2 ...
##  $ PDRB        : num [1:17] NA 3.65e+06 4.29e+08 3.14e+08 4.60e+08 ...
summary(data)
##  Nama Wilayah             P0             RLS                 PPK       
##  Length:17          Min.   : 2.570   Length:17          Min.   :10410  
##  Class :character   1st Qu.: 4.287   Class :character   1st Qu.:12352  
##  Mode  :character   Median : 5.075   Mode  :character   Median :15586  
##                     Mean   : 5.922                      Mean   :15609  
##                     3rd Qu.: 7.210                      3rd Qu.:17641  
##                     Max.   :15.060                      Max.   :23888  
##                     NA's   :3                           NA's   :3      
##       IPM             UHH             SNTS            AIR        
##  Min.   :70.60   Min.   :68.99   Min.   :63.91   Min.   : 91.83  
##  1st Qu.:74.98   1st Qu.:71.82   1st Qu.:84.29   1st Qu.: 97.98  
##  Median :80.94   Median :73.75   Median :92.78   Median : 99.37  
##  Mean   :78.65   Mean   :72.98   Mean   :89.25   Mean   : 98.10  
##  3rd Qu.:81.72   3rd Qu.:74.20   3rd Qu.:96.98   3rd Qu.: 99.99  
##  Max.   :84.90   Max.   :75.19   Max.   :98.84   Max.   :100.00  
##  NA's   :3       NA's   :3       NA's   :3       NA's   :3       
##       TPT              TPAK            PDRB          
##  Min.   : 7.332   Min.   :60.85   Min.   :  3649179  
##  1st Qu.: 8.587   1st Qu.:62.56   1st Qu.: 64222352  
##  Median : 9.065   Median :63.19   Median :133143936  
##  Mean   : 9.447   Mean   :63.46   Mean   :192635338  
##  3rd Qu.:10.029   3rd Qu.:64.70   3rd Qu.:324177110  
##  Max.   :12.224   Max.   :65.87   Max.   :460081046  
##  NA's   :3        NA's   :3       NA's   :3
data_gabungan <- data[-c(1, 8, 14), -c(1)]

head(data_gabungan)
## # A tibble: 6 × 10
##      P0 RLS     PPK   IPM   UHH  SNTS   AIR   TPT  TPAK       PDRB
##   <dbl> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>      <dbl>
## 1 15.1  8.81  12587  72.1  69.0  83.8  94.2  8.58  65.5   3649179.
## 2  3.56 11.64 23888  84.9  74.2  96.5 100.   7.33  61.4 429398830.
## 3  4.28 11.67 17733  83.0  74.5  96.7  99.6  8.23  60.9 313842135.
## 4  4.94 11.39 17365  81.6  74.2  89.6 100    7.75  63.2 460081046.
## 5  4.31 10.78 20801  81.8  73.7  97.7 100    9.06  63.2 327622101.
## 6  7.24 10.81 18762  80.5  73.4  90.6 100    9.84  65.8 330894597.
str(data_gabungan)
## tibble [14 × 10] (S3: tbl_df/tbl/data.frame)
##  $ P0  : num [1:14] 15.06 3.56 4.28 4.94 4.31 ...
##  $ RLS : chr [1:14] "8.81" "11.64" "11.67" "11.39" ...
##  $ PPK : num [1:14] 12587 23888 17733 17365 20801 ...
##  $ IPM : num [1:14] 72.1 84.9 83 81.6 81.8 ...
##  $ UHH : num [1:14] 69 74.2 74.5 74.2 73.7 ...
##  $ SNTS: num [1:14] 83.8 96.5 96.7 89.6 97.7 ...
##  $ AIR : num [1:14] 94.2 100 99.6 100 100 ...
##  $ TPT : num [1:14] 8.58 7.33 8.23 7.75 9.06 ...
##  $ TPAK: num [1:14] 65.5 61.4 60.9 63.2 63.2 ...
##  $ PDRB: num [1:14] 3.65e+06 4.29e+08 3.14e+08 4.60e+08 3.28e+08 ...
summary(data_gabungan)
##        P0             RLS                 PPK             IPM       
##  Min.   : 2.570   Length:14          Min.   :10410   Min.   :70.60  
##  1st Qu.: 4.287   Class :character   1st Qu.:12352   1st Qu.:74.98  
##  Median : 5.075   Mode  :character   Median :15586   Median :80.94  
##  Mean   : 5.922                      Mean   :15609   Mean   :78.65  
##  3rd Qu.: 7.210                      3rd Qu.:17641   3rd Qu.:81.72  
##  Max.   :15.060                      Max.   :23888   Max.   :84.90  
##       UHH             SNTS            AIR              TPT        
##  Min.   :68.99   Min.   :63.91   Min.   : 91.83   Min.   : 7.332  
##  1st Qu.:71.82   1st Qu.:84.29   1st Qu.: 97.98   1st Qu.: 8.587  
##  Median :73.75   Median :92.78   Median : 99.37   Median : 9.065  
##  Mean   :72.98   Mean   :89.25   Mean   : 98.10   Mean   : 9.447  
##  3rd Qu.:74.20   3rd Qu.:96.98   3rd Qu.: 99.99   3rd Qu.:10.029  
##  Max.   :75.19   Max.   :98.84   Max.   :100.00   Max.   :12.224  
##       TPAK            PDRB          
##  Min.   :60.85   Min.   :  3649179  
##  1st Qu.:62.56   1st Qu.: 64222352  
##  Median :63.19   Median :133143936  
##  Mean   :63.46   Mean   :192635338  
##  3rd Qu.:64.70   3rd Qu.:324177110  
##  Max.   :65.87   Max.   :460081046
colnames(data_gabungan) <- c("y", "x1", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9")

print(data_gabungan)
## # A tibble: 14 × 10
##        y x1                    x2    x3    x4    x5    x6    x7    x8         x9
##    <dbl> <chr>              <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>      <dbl>
##  1 15.1  8.81               12587  72.1  69.0  83.8  94.2  8.58  65.5   3649179.
##  2  3.56 11.64              23888  84.9  74.2  96.5 100.   7.33  61.4 429398830.
##  3  4.28 11.67              17733  83.0  74.5  96.7  99.6  8.23  60.9 313842135.
##  4  4.94 11.39              17365  81.6  74.2  89.6 100    7.75  63.2 460081046.
##  5  4.31 10.78              20801  81.8  73.7  97.7 100    9.06  63.2 327622101.
##  6  7.24 10.81              18762  80.5  73.4  90.6 100    9.84  65.8 330894597.
##  7  8.13 8.31               10410  70.6  71.4  63.9  91.8 12.2   62.6 159582645.
##  8  5.21 9.3000000000000007 11341  74.4  73.8  81.0  95.0 10.1   65.9 251828575.
##  9  7.24 10.53              11716  76.6  73.8  75.3  98.1 11.8   61.8  33372476.
## 10  4.74 11.31              15903  82.0  75.2  97.5 100   10.9   64.8  69796936.
## 11  2.58 11.46              15420  81.4  74.6  97.1  99.5  9.76  62.6  49946927.
## 12  7.12 8.61               12273  72.3  69.9  85.9  98.0  9.06  63.9  97809902.
## 13  5.93 10.83              14575  78.5  71.6  94.9  98.0  9.07  64.5 106705227.
## 14  2.57 11.82              15751  81.6  72.5  98.8  99.2  8.60  62.6  62364158.

EKSPLORASI DATA

#install.packages("GGally")
#install.packages("corrplot")
#install.packages("moments")
library(moments)
library(corrplot)
## Warning: package 'corrplot' was built under R version 4.4.3
## corrplot 0.95 loaded
library(GGally)
## Warning: package 'GGally' was built under R version 4.4.3
## Loading required package: ggplot2
## Warning: package 'ggplot2' was built under R version 4.4.2
## Registered S3 method overwritten by 'GGally':
##   method from   
##   +.gg   ggplot2
library(ggplot2)
library(tidyr)
## Warning: package 'tidyr' was built under R version 4.4.2
library(dplyr)
## Warning: package 'dplyr' was built under R version 4.4.2
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(car)
## Warning: package 'car' was built under R version 4.4.3
## Loading required package: carData
## Warning: package 'carData' was built under R version 4.4.3
## 
## Attaching package: 'car'
## The following object is masked from 'package:dplyr':
## 
##     recode
data_gabungan$y <- as.numeric(as.character(data_gabungan$y))
data_gabungan$x1 <- as.numeric(as.character(data_gabungan$x1))
data_gabungan$x2 <- as.numeric(as.character(data_gabungan$x2))
data_gabungan$x3 <- as.numeric(as.character(data_gabungan$x3))
data_gabungan$x4 <- as.numeric(as.character(data_gabungan$x4))
data_gabungan$x5 <- as.numeric(as.character(data_gabungan$x5))
data_gabungan$x6 <- as.numeric(as.character(data_gabungan$x6))
data_gabungan$x7 <- as.numeric(as.character(data_gabungan$x7))
data_gabungan$x8 <- as.numeric(as.character(data_gabungan$x8))
data_gabungan$x9 <- as.numeric(as.character(data_gabungan$x9))
ggplot(data_gabungan, aes(x = y)) +
  geom_histogram(fill = "palegreen3", bins = 20, color = "palegreen4") +
  labs(title = "Sebaran Persentase Penduduk Miskin", x = "y", y = "frekuensi")

ggplot(data_gabungan, aes(x = x1)) +
  geom_histogram(fill = "snow3", bins = 20, color = "snow4") +
  labs(title = "Sebaran x1", x = "X1", y = "frekuensi")

ggplot(data_gabungan, aes(x = x2)) +
  geom_histogram(fill = "lemonchiffon3", bins = 20, color = "lemonchiffon4") +
  labs(title = "Sebaran x2", x = "X2", y = "frekuensi")

ggplot(data_gabungan, aes(x = x3)) +
  geom_histogram(fill = "bisque2", bins = 20, color = "bisque3") +
  labs(title = "Sebaran x3", x = "X3", y = "frekuensi")

ggplot(data_gabungan, aes(x = x4)) +
  geom_histogram(fill = "lightsteelblue2", bins = 20, color = "lightsteelblue3") +
  labs(title = "Sebaran x4", x = "X4", y = "frekuensi")

ggplot(data_gabungan, aes(x = x5)) +
  geom_histogram(fill = "lightpink", bins = 20, color = "lightpink3") +
  labs(title = "Sebaran x5", x = "X5", y = "frekuensi")

ggplot(data_gabungan, aes(x = x6)) +
  geom_histogram(fill = "paleturquoise1", bins = 20, color = "paleturquoise2") +
  labs(title = "Sebaran x6", x = "X6", y = "frekuensi")

ggplot(data_gabungan, aes(x = x7)) +
  geom_histogram(fill = "peachpuff1", bins = 20, color = "peachpuff3") +
  labs(title = "Sebaran x7", x = "X7", y = "frekuensi")

ggplot(data_gabungan, aes(x = x8)) +
  geom_histogram(fill = "seagreen2", bins = 20, color = "seagreen4") +
  labs(title = "Sebaran x8", x = "X8", y = "frekuensi")

ggplot(data_gabungan, aes(x = x9)) +
  geom_histogram(fill = "violetred1", bins = 20, color = "violetred4") +
  labs(title = "Sebaran x9", x = "X9", y = "frekuensi")

ggplot(data_gabungan, aes(x = y)) +
  geom_boxplot(fill = "palegreen3", color = "palegreen4") +
  labs(title = "Sebaran Persentase Penduduk Miskin", x = "X1", y = "frekuensi")

ggplot(data_gabungan, aes(x = x1)) +
  geom_boxplot(fill = "snow3", color = "snow4") +
  labs(title = "Sebaran x1", x = "X1", y = "frekuensi")

ggplot(data_gabungan, aes(x = x2)) +
  geom_boxplot(fill = "lemonchiffon3", color = "lemonchiffon4") +
  labs(title = "Sebaran x2", x = "X2", y = "frekuensi")

ggplot(data_gabungan, aes(x = x3)) +
  geom_boxplot(fill = "bisque2", color = "bisque3") +
  labs(title = "Sebaran x3", x = "X3", y = "frekuensi")

ggplot(data_gabungan, aes(x = x4)) +
  geom_boxplot(fill = "lightsteelblue2", color = "lightsteelblue3") +
  labs(title = "Sebaran x4", x = "X4", y = "frekuensi")

ggplot(data_gabungan, aes(x = x5)) +
  geom_boxplot(fill = "lightpink", color = "lightpink3") +
  labs(title = "Sebaran x5", x = "X5", y = "frekuensi")

ggplot(data_gabungan, aes(x = x6)) +
  geom_boxplot(fill = "paleturquoise2", color = "paleturquoise4") +
  labs(title = "Sebaran x6", x = "X6", y = "frekuensi")

ggplot(data_gabungan, aes(x = x7)) +
  geom_boxplot(fill = "peachpuff1", color = "peachpuff3") +
  labs(title = "Sebaran x7", x = "X7", y = "frekuensi")

ggplot(data_gabungan, aes(x = x8)) +
  geom_boxplot(fill = "seagreen2", color = "seagreen4") +
  labs(title = "Sebaran x8", x = "X8", y = "frekuensi")

ggplot(data_gabungan, aes(x = x9)) +
  geom_boxplot(fill = "violetred1", color = "violetred4") +
  labs(title = "Sebaran x9", x = "X9", y = "frekuensi")

pairs(data_gabungan[, c("y", "x1", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9")], pch = 21, bg = "hotpink1")

numeric_data <- data_gabungan[, sapply(data_gabungan, is.numeric)]
cor_matrix <- cor(numeric_data, use = "complete.obs")
print(cor_matrix)
##             y         x1         x2         x3          x4          x5
## y   1.0000000 -0.7078855 -0.4681898 -0.7144467 -0.74495303 -0.54936803
## x1 -0.7078855  1.0000000  0.7035595  0.9622028  0.73591037  0.78419435
## x2 -0.4681898  0.7035595  1.0000000  0.8525791  0.49037144  0.71347350
## x3 -0.7144467  0.9622028  0.8525791  1.0000000  0.75849930  0.82238737
## x4 -0.7449530  0.7359104  0.4903714  0.7584993  1.00000000  0.40319651
## x5 -0.5493680  0.7841943  0.7134735  0.8223874  0.40319651  1.00000000
## x6 -0.6321075  0.8495864  0.7364240  0.8758912  0.61483507  0.82493126
## x7  0.1751585 -0.4231782 -0.6328588 -0.4952487  0.01765293 -0.65616164
## x8  0.4483761 -0.4138360 -0.2924008 -0.3835278 -0.33274532 -0.07992076
## x9 -0.3393676  0.3169138  0.6789079  0.4808685  0.43626199  0.19659241
##            x6          x7          x8         x9
## y  -0.6321075  0.17515845  0.44837615 -0.3393676
## x1  0.8495864 -0.42317819 -0.41383603  0.3169138
## x2  0.7364240 -0.63285884 -0.29240081  0.6789079
## x3  0.8758912 -0.49524873 -0.38352775  0.4808685
## x4  0.6148351  0.01765293 -0.33274532  0.4362620
## x5  0.8249313 -0.65616164 -0.07992076  0.1965924
## x6  1.0000000 -0.46897390 -0.24859308  0.3510106
## x7 -0.4689739  1.00000000  0.13376674 -0.4851016
## x8 -0.2485931  0.13376674  1.00000000 -0.1752825
## x9  0.3510106 -0.48510157 -0.17528253  1.0000000
corrplot(cor_matrix, method = "color", type = "upper", 
         tl.col = "black", addCoef.col = "black", number.cex = 0.7)

PEMODELAN

Tanpa Kepulauan Seribu

print(data_gabungan)
## # A tibble: 14 × 10
##        y    x1    x2    x3    x4    x5    x6    x7    x8         x9
##    <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>      <dbl>
##  1 15.1   8.81 12587  72.1  69.0  83.8  94.2  8.58  65.5   3649179.
##  2  3.56 11.6  23888  84.9  74.2  96.5 100.   7.33  61.4 429398830.
##  3  4.28 11.7  17733  83.0  74.5  96.7  99.6  8.23  60.9 313842135.
##  4  4.94 11.4  17365  81.6  74.2  89.6 100    7.75  63.2 460081046.
##  5  4.31 10.8  20801  81.8  73.7  97.7 100    9.06  63.2 327622101.
##  6  7.24 10.8  18762  80.5  73.4  90.6 100    9.84  65.8 330894597.
##  7  8.13  8.31 10410  70.6  71.4  63.9  91.8 12.2   62.6 159582645.
##  8  5.21  9.3  11341  74.4  73.8  81.0  95.0 10.1   65.9 251828575.
##  9  7.24 10.5  11716  76.6  73.8  75.3  98.1 11.8   61.8  33372476.
## 10  4.74 11.3  15903  82.0  75.2  97.5 100   10.9   64.8  69796936.
## 11  2.58 11.5  15420  81.4  74.6  97.1  99.5  9.76  62.6  49946927.
## 12  7.12  8.61 12273  72.3  69.9  85.9  98.0  9.06  63.9  97809902.
## 13  5.93 10.8  14575  78.5  71.6  94.9  98.0  9.07  64.5 106705227.
## 14  2.57 11.8  15751  81.6  72.5  98.8  99.2  8.60  62.6  62364158.
data_NoKepulauanSeribu  <- data_gabungan[-c(1), ]
print(data_NoKepulauanSeribu)
## # A tibble: 13 × 10
##        y    x1    x2    x3    x4    x5    x6    x7    x8         x9
##    <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>      <dbl>
##  1  3.56 11.6  23888  84.9  74.2  96.5 100.   7.33  61.4 429398830.
##  2  4.28 11.7  17733  83.0  74.5  96.7  99.6  8.23  60.9 313842135.
##  3  4.94 11.4  17365  81.6  74.2  89.6 100    7.75  63.2 460081046.
##  4  4.31 10.8  20801  81.8  73.7  97.7 100    9.06  63.2 327622101.
##  5  7.24 10.8  18762  80.5  73.4  90.6 100    9.84  65.8 330894597.
##  6  8.13  8.31 10410  70.6  71.4  63.9  91.8 12.2   62.6 159582645.
##  7  5.21  9.3  11341  74.4  73.8  81.0  95.0 10.1   65.9 251828575.
##  8  7.24 10.5  11716  76.6  73.8  75.3  98.1 11.8   61.8  33372476.
##  9  4.74 11.3  15903  82.0  75.2  97.5 100   10.9   64.8  69796936.
## 10  2.58 11.5  15420  81.4  74.6  97.1  99.5  9.76  62.6  49946927.
## 11  7.12  8.61 12273  72.3  69.9  85.9  98.0  9.06  63.9  97809902.
## 12  5.93 10.8  14575  78.5  71.6  94.9  98.0  9.07  64.5 106705227.
## 13  2.57 11.8  15751  81.6  72.5  98.8  99.2  8.60  62.6  62364158.
modelRLB <- lm(y ~ x1 + x2 + x3 + x4 + x5 + x6 + x7 + x8 + x9, data = data_NoKepulauanSeribu)
summary(modelRLB)
## 
## Call:
## lm(formula = y ~ x1 + x2 + x3 + x4 + x5 + x6 + x7 + x8 + x9, 
##     data = data_NoKepulauanSeribu)
## 
## Residuals:
##        1        2        3        4        5        6        7        8 
##  0.39682  0.71901 -0.55507 -0.56121  0.08233 -0.27683  0.03260  0.33861 
##        9       10       11       12       13 
##  0.11955 -0.36945  0.02258  1.02239 -0.97133 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.376e+01  6.718e+01  -0.205    0.851
## x1          -1.747e+00  4.460e+00  -0.392    0.721
## x2          -4.974e-04  8.374e-04  -0.594    0.594
## x3           9.628e-01  2.287e+00   0.421    0.702
## x4          -1.264e+00  9.866e-01  -1.282    0.290
## x5          -1.332e-01  2.308e-01  -0.577    0.604
## x6           5.580e-01  3.014e-01   1.851    0.161
## x7           1.079e+00  8.736e-01   1.235    0.305
## x8           1.094e-01  3.713e-01   0.295    0.788
## x9           8.539e-09  8.528e-09   1.001    0.390
## 
## Residual standard error: 1.101 on 3 degrees of freedom
## Multiple R-squared:  0.908,  Adjusted R-squared:  0.6318 
## F-statistic: 3.288 on 9 and 3 DF,  p-value: 0.1781

- R-squared 0.908 artinya model dapat menjelaskan 90.8% variasi dalam y menggunakan x1 sampai x9. Sementara, Adjusted R-squared hanya 0.63, artinya penyesuaian untuk jumlah variabel tidak sebaik itu, mungkin karena jumlah data terlalu sedikit

- Secara keseluruhan, model tidak signifikan (karena p-value > 0.05) sehingga gabungan x1 sampai x9 belum cukup kuat menjelaskan y secara meyakinkan

- Residual standard error sebesar 1.101 unit dari nilai sebenarnya dengan degree of freedom (df) tinggal 3 yang sangat rendah. Jumlah data terlalu sedikit untuk model dengan 9 prediktor dan model sangat rawan overfitting.

UJI ASUMSI

# 
plot(modelRLB, which = 2)

# Homoskedastisitas
plot(modelRLB, which = 3)

# Independensi Residu
plot(modelRLB, which = 1)

# Multikolinieritas
vif(modelRLB)
##         x1         x2         x3         x4         x5         x6         x7 
## 271.993515 107.069172 993.100102  22.350514  59.277300   5.324123  16.264828 
##         x8         x9 
##   3.434989  16.535070