library(readxl)
## Warning: package 'readxl' was built under R version 4.4.2
data <- read_excel("C:/Users/Raiqa/Downloads/Data Miskin Kak Bintang.xlsx")
# Lihat Data
head(data)
## # A tibble: 6 × 11
## `Nama Wilayah` P0 RLS PPK IPM UHH SNTS AIR TPT TPAK PDRB
## <chr> <dbl> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 DKI JAKARTA NA <NA> NA NA NA NA NA NA NA NA
## 2 Kepulauan Seribu 15.1 8.81 12587 72.1 69.0 83.8 94.2 8.58 65.5 3.65e6
## 3 Kota Jakarta Se… 3.56 11.64 23888 84.9 74.2 96.5 100. 7.33 61.4 4.29e8
## 4 Kota Jakarta Ti… 4.28 11.67 17733 83.0 74.5 96.7 99.6 8.23 60.9 3.14e8
## 5 Kota Jakarta Pu… 4.94 11.39 17365 81.6 74.2 89.6 100 7.75 63.2 4.60e8
## 6 Kota Jakarta Ba… 4.31 10.78 20801 81.8 73.7 97.7 100 9.06 63.2 3.28e8
str(data)
## tibble [17 × 11] (S3: tbl_df/tbl/data.frame)
## $ Nama Wilayah: chr [1:17] "DKI JAKARTA" "Kepulauan Seribu" "Kota Jakarta Selatan" "Kota Jakarta Timur" ...
## $ P0 : num [1:17] NA 15.06 3.56 4.28 4.94 ...
## $ RLS : chr [1:17] NA "8.81" "11.64" "11.67" ...
## $ PPK : num [1:17] NA 12587 23888 17733 17365 ...
## $ IPM : num [1:17] NA 72.1 84.9 83 81.6 ...
## $ UHH : num [1:17] NA 69 74.2 74.5 74.2 ...
## $ SNTS : num [1:17] NA 83.8 96.5 96.7 89.6 ...
## $ AIR : num [1:17] NA 94.2 100 99.6 100 ...
## $ TPT : num [1:17] NA 8.58 7.33 8.23 7.75 ...
## $ TPAK : num [1:17] NA 65.5 61.4 60.9 63.2 ...
## $ PDRB : num [1:17] NA 3.65e+06 4.29e+08 3.14e+08 4.60e+08 ...
summary(data)
## Nama Wilayah P0 RLS PPK
## Length:17 Min. : 2.570 Length:17 Min. :10410
## Class :character 1st Qu.: 4.287 Class :character 1st Qu.:12352
## Mode :character Median : 5.075 Mode :character Median :15586
## Mean : 5.922 Mean :15609
## 3rd Qu.: 7.210 3rd Qu.:17641
## Max. :15.060 Max. :23888
## NA's :3 NA's :3
## IPM UHH SNTS AIR
## Min. :70.60 Min. :68.99 Min. :63.91 Min. : 91.83
## 1st Qu.:74.98 1st Qu.:71.82 1st Qu.:84.29 1st Qu.: 97.98
## Median :80.94 Median :73.75 Median :92.78 Median : 99.37
## Mean :78.65 Mean :72.98 Mean :89.25 Mean : 98.10
## 3rd Qu.:81.72 3rd Qu.:74.20 3rd Qu.:96.98 3rd Qu.: 99.99
## Max. :84.90 Max. :75.19 Max. :98.84 Max. :100.00
## NA's :3 NA's :3 NA's :3 NA's :3
## TPT TPAK PDRB
## Min. : 7.332 Min. :60.85 Min. : 3649179
## 1st Qu.: 8.587 1st Qu.:62.56 1st Qu.: 64222352
## Median : 9.065 Median :63.19 Median :133143936
## Mean : 9.447 Mean :63.46 Mean :192635338
## 3rd Qu.:10.029 3rd Qu.:64.70 3rd Qu.:324177110
## Max. :12.224 Max. :65.87 Max. :460081046
## NA's :3 NA's :3 NA's :3
data_gabungan <- data[-c(1, 8, 14), -c(1)]
head(data_gabungan)
## # A tibble: 6 × 10
## P0 RLS PPK IPM UHH SNTS AIR TPT TPAK PDRB
## <dbl> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 15.1 8.81 12587 72.1 69.0 83.8 94.2 8.58 65.5 3649179.
## 2 3.56 11.64 23888 84.9 74.2 96.5 100. 7.33 61.4 429398830.
## 3 4.28 11.67 17733 83.0 74.5 96.7 99.6 8.23 60.9 313842135.
## 4 4.94 11.39 17365 81.6 74.2 89.6 100 7.75 63.2 460081046.
## 5 4.31 10.78 20801 81.8 73.7 97.7 100 9.06 63.2 327622101.
## 6 7.24 10.81 18762 80.5 73.4 90.6 100 9.84 65.8 330894597.
str(data_gabungan)
## tibble [14 × 10] (S3: tbl_df/tbl/data.frame)
## $ P0 : num [1:14] 15.06 3.56 4.28 4.94 4.31 ...
## $ RLS : chr [1:14] "8.81" "11.64" "11.67" "11.39" ...
## $ PPK : num [1:14] 12587 23888 17733 17365 20801 ...
## $ IPM : num [1:14] 72.1 84.9 83 81.6 81.8 ...
## $ UHH : num [1:14] 69 74.2 74.5 74.2 73.7 ...
## $ SNTS: num [1:14] 83.8 96.5 96.7 89.6 97.7 ...
## $ AIR : num [1:14] 94.2 100 99.6 100 100 ...
## $ TPT : num [1:14] 8.58 7.33 8.23 7.75 9.06 ...
## $ TPAK: num [1:14] 65.5 61.4 60.9 63.2 63.2 ...
## $ PDRB: num [1:14] 3.65e+06 4.29e+08 3.14e+08 4.60e+08 3.28e+08 ...
summary(data_gabungan)
## P0 RLS PPK IPM
## Min. : 2.570 Length:14 Min. :10410 Min. :70.60
## 1st Qu.: 4.287 Class :character 1st Qu.:12352 1st Qu.:74.98
## Median : 5.075 Mode :character Median :15586 Median :80.94
## Mean : 5.922 Mean :15609 Mean :78.65
## 3rd Qu.: 7.210 3rd Qu.:17641 3rd Qu.:81.72
## Max. :15.060 Max. :23888 Max. :84.90
## UHH SNTS AIR TPT
## Min. :68.99 Min. :63.91 Min. : 91.83 Min. : 7.332
## 1st Qu.:71.82 1st Qu.:84.29 1st Qu.: 97.98 1st Qu.: 8.587
## Median :73.75 Median :92.78 Median : 99.37 Median : 9.065
## Mean :72.98 Mean :89.25 Mean : 98.10 Mean : 9.447
## 3rd Qu.:74.20 3rd Qu.:96.98 3rd Qu.: 99.99 3rd Qu.:10.029
## Max. :75.19 Max. :98.84 Max. :100.00 Max. :12.224
## TPAK PDRB
## Min. :60.85 Min. : 3649179
## 1st Qu.:62.56 1st Qu.: 64222352
## Median :63.19 Median :133143936
## Mean :63.46 Mean :192635338
## 3rd Qu.:64.70 3rd Qu.:324177110
## Max. :65.87 Max. :460081046
colnames(data_gabungan) <- c("y", "x1", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9")
print(data_gabungan)
## # A tibble: 14 × 10
## y x1 x2 x3 x4 x5 x6 x7 x8 x9
## <dbl> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 15.1 8.81 12587 72.1 69.0 83.8 94.2 8.58 65.5 3649179.
## 2 3.56 11.64 23888 84.9 74.2 96.5 100. 7.33 61.4 429398830.
## 3 4.28 11.67 17733 83.0 74.5 96.7 99.6 8.23 60.9 313842135.
## 4 4.94 11.39 17365 81.6 74.2 89.6 100 7.75 63.2 460081046.
## 5 4.31 10.78 20801 81.8 73.7 97.7 100 9.06 63.2 327622101.
## 6 7.24 10.81 18762 80.5 73.4 90.6 100 9.84 65.8 330894597.
## 7 8.13 8.31 10410 70.6 71.4 63.9 91.8 12.2 62.6 159582645.
## 8 5.21 9.3000000000000007 11341 74.4 73.8 81.0 95.0 10.1 65.9 251828575.
## 9 7.24 10.53 11716 76.6 73.8 75.3 98.1 11.8 61.8 33372476.
## 10 4.74 11.31 15903 82.0 75.2 97.5 100 10.9 64.8 69796936.
## 11 2.58 11.46 15420 81.4 74.6 97.1 99.5 9.76 62.6 49946927.
## 12 7.12 8.61 12273 72.3 69.9 85.9 98.0 9.06 63.9 97809902.
## 13 5.93 10.83 14575 78.5 71.6 94.9 98.0 9.07 64.5 106705227.
## 14 2.57 11.82 15751 81.6 72.5 98.8 99.2 8.60 62.6 62364158.
#install.packages("GGally")
#install.packages("corrplot")
#install.packages("moments")
library(moments)
library(corrplot)
## Warning: package 'corrplot' was built under R version 4.4.3
## corrplot 0.95 loaded
library(GGally)
## Warning: package 'GGally' was built under R version 4.4.3
## Loading required package: ggplot2
## Warning: package 'ggplot2' was built under R version 4.4.2
## Registered S3 method overwritten by 'GGally':
## method from
## +.gg ggplot2
library(ggplot2)
library(tidyr)
## Warning: package 'tidyr' was built under R version 4.4.2
library(dplyr)
## Warning: package 'dplyr' was built under R version 4.4.2
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(car)
## Warning: package 'car' was built under R version 4.4.3
## Loading required package: carData
## Warning: package 'carData' was built under R version 4.4.3
##
## Attaching package: 'car'
## The following object is masked from 'package:dplyr':
##
## recode
data_gabungan$y <- as.numeric(as.character(data_gabungan$y))
data_gabungan$x1 <- as.numeric(as.character(data_gabungan$x1))
data_gabungan$x2 <- as.numeric(as.character(data_gabungan$x2))
data_gabungan$x3 <- as.numeric(as.character(data_gabungan$x3))
data_gabungan$x4 <- as.numeric(as.character(data_gabungan$x4))
data_gabungan$x5 <- as.numeric(as.character(data_gabungan$x5))
data_gabungan$x6 <- as.numeric(as.character(data_gabungan$x6))
data_gabungan$x7 <- as.numeric(as.character(data_gabungan$x7))
data_gabungan$x8 <- as.numeric(as.character(data_gabungan$x8))
data_gabungan$x9 <- as.numeric(as.character(data_gabungan$x9))
ggplot(data_gabungan, aes(x = y)) +
geom_histogram(fill = "palegreen3", bins = 20, color = "palegreen4") +
labs(title = "Sebaran Persentase Penduduk Miskin", x = "y", y = "frekuensi")
ggplot(data_gabungan, aes(x = x1)) +
geom_histogram(fill = "snow3", bins = 20, color = "snow4") +
labs(title = "Sebaran x1", x = "X1", y = "frekuensi")
ggplot(data_gabungan, aes(x = x2)) +
geom_histogram(fill = "lemonchiffon3", bins = 20, color = "lemonchiffon4") +
labs(title = "Sebaran x2", x = "X2", y = "frekuensi")
ggplot(data_gabungan, aes(x = x3)) +
geom_histogram(fill = "bisque2", bins = 20, color = "bisque3") +
labs(title = "Sebaran x3", x = "X3", y = "frekuensi")
ggplot(data_gabungan, aes(x = x4)) +
geom_histogram(fill = "lightsteelblue2", bins = 20, color = "lightsteelblue3") +
labs(title = "Sebaran x4", x = "X4", y = "frekuensi")
ggplot(data_gabungan, aes(x = x5)) +
geom_histogram(fill = "lightpink", bins = 20, color = "lightpink3") +
labs(title = "Sebaran x5", x = "X5", y = "frekuensi")
ggplot(data_gabungan, aes(x = x6)) +
geom_histogram(fill = "paleturquoise1", bins = 20, color = "paleturquoise2") +
labs(title = "Sebaran x6", x = "X6", y = "frekuensi")
ggplot(data_gabungan, aes(x = x7)) +
geom_histogram(fill = "peachpuff1", bins = 20, color = "peachpuff3") +
labs(title = "Sebaran x7", x = "X7", y = "frekuensi")
ggplot(data_gabungan, aes(x = x8)) +
geom_histogram(fill = "seagreen2", bins = 20, color = "seagreen4") +
labs(title = "Sebaran x8", x = "X8", y = "frekuensi")
ggplot(data_gabungan, aes(x = x9)) +
geom_histogram(fill = "violetred1", bins = 20, color = "violetred4") +
labs(title = "Sebaran x9", x = "X9", y = "frekuensi")
ggplot(data_gabungan, aes(x = y)) +
geom_boxplot(fill = "palegreen3", color = "palegreen4") +
labs(title = "Sebaran Persentase Penduduk Miskin", x = "X1", y = "frekuensi")
ggplot(data_gabungan, aes(x = x1)) +
geom_boxplot(fill = "snow3", color = "snow4") +
labs(title = "Sebaran x1", x = "X1", y = "frekuensi")
ggplot(data_gabungan, aes(x = x2)) +
geom_boxplot(fill = "lemonchiffon3", color = "lemonchiffon4") +
labs(title = "Sebaran x2", x = "X2", y = "frekuensi")
ggplot(data_gabungan, aes(x = x3)) +
geom_boxplot(fill = "bisque2", color = "bisque3") +
labs(title = "Sebaran x3", x = "X3", y = "frekuensi")
ggplot(data_gabungan, aes(x = x4)) +
geom_boxplot(fill = "lightsteelblue2", color = "lightsteelblue3") +
labs(title = "Sebaran x4", x = "X4", y = "frekuensi")
ggplot(data_gabungan, aes(x = x5)) +
geom_boxplot(fill = "lightpink", color = "lightpink3") +
labs(title = "Sebaran x5", x = "X5", y = "frekuensi")
ggplot(data_gabungan, aes(x = x6)) +
geom_boxplot(fill = "paleturquoise2", color = "paleturquoise4") +
labs(title = "Sebaran x6", x = "X6", y = "frekuensi")
ggplot(data_gabungan, aes(x = x7)) +
geom_boxplot(fill = "peachpuff1", color = "peachpuff3") +
labs(title = "Sebaran x7", x = "X7", y = "frekuensi")
ggplot(data_gabungan, aes(x = x8)) +
geom_boxplot(fill = "seagreen2", color = "seagreen4") +
labs(title = "Sebaran x8", x = "X8", y = "frekuensi")
ggplot(data_gabungan, aes(x = x9)) +
geom_boxplot(fill = "violetred1", color = "violetred4") +
labs(title = "Sebaran x9", x = "X9", y = "frekuensi")
pairs(data_gabungan[, c("y", "x1", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9")], pch = 21, bg = "hotpink1")
numeric_data <- data_gabungan[, sapply(data_gabungan, is.numeric)]
cor_matrix <- cor(numeric_data, use = "complete.obs")
print(cor_matrix)
## y x1 x2 x3 x4 x5
## y 1.0000000 -0.7078855 -0.4681898 -0.7144467 -0.74495303 -0.54936803
## x1 -0.7078855 1.0000000 0.7035595 0.9622028 0.73591037 0.78419435
## x2 -0.4681898 0.7035595 1.0000000 0.8525791 0.49037144 0.71347350
## x3 -0.7144467 0.9622028 0.8525791 1.0000000 0.75849930 0.82238737
## x4 -0.7449530 0.7359104 0.4903714 0.7584993 1.00000000 0.40319651
## x5 -0.5493680 0.7841943 0.7134735 0.8223874 0.40319651 1.00000000
## x6 -0.6321075 0.8495864 0.7364240 0.8758912 0.61483507 0.82493126
## x7 0.1751585 -0.4231782 -0.6328588 -0.4952487 0.01765293 -0.65616164
## x8 0.4483761 -0.4138360 -0.2924008 -0.3835278 -0.33274532 -0.07992076
## x9 -0.3393676 0.3169138 0.6789079 0.4808685 0.43626199 0.19659241
## x6 x7 x8 x9
## y -0.6321075 0.17515845 0.44837615 -0.3393676
## x1 0.8495864 -0.42317819 -0.41383603 0.3169138
## x2 0.7364240 -0.63285884 -0.29240081 0.6789079
## x3 0.8758912 -0.49524873 -0.38352775 0.4808685
## x4 0.6148351 0.01765293 -0.33274532 0.4362620
## x5 0.8249313 -0.65616164 -0.07992076 0.1965924
## x6 1.0000000 -0.46897390 -0.24859308 0.3510106
## x7 -0.4689739 1.00000000 0.13376674 -0.4851016
## x8 -0.2485931 0.13376674 1.00000000 -0.1752825
## x9 0.3510106 -0.48510157 -0.17528253 1.0000000
corrplot(cor_matrix, method = "color", type = "upper",
tl.col = "black", addCoef.col = "black", number.cex = 0.7)
print(data_gabungan)
## # A tibble: 14 × 10
## y x1 x2 x3 x4 x5 x6 x7 x8 x9
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 15.1 8.81 12587 72.1 69.0 83.8 94.2 8.58 65.5 3649179.
## 2 3.56 11.6 23888 84.9 74.2 96.5 100. 7.33 61.4 429398830.
## 3 4.28 11.7 17733 83.0 74.5 96.7 99.6 8.23 60.9 313842135.
## 4 4.94 11.4 17365 81.6 74.2 89.6 100 7.75 63.2 460081046.
## 5 4.31 10.8 20801 81.8 73.7 97.7 100 9.06 63.2 327622101.
## 6 7.24 10.8 18762 80.5 73.4 90.6 100 9.84 65.8 330894597.
## 7 8.13 8.31 10410 70.6 71.4 63.9 91.8 12.2 62.6 159582645.
## 8 5.21 9.3 11341 74.4 73.8 81.0 95.0 10.1 65.9 251828575.
## 9 7.24 10.5 11716 76.6 73.8 75.3 98.1 11.8 61.8 33372476.
## 10 4.74 11.3 15903 82.0 75.2 97.5 100 10.9 64.8 69796936.
## 11 2.58 11.5 15420 81.4 74.6 97.1 99.5 9.76 62.6 49946927.
## 12 7.12 8.61 12273 72.3 69.9 85.9 98.0 9.06 63.9 97809902.
## 13 5.93 10.8 14575 78.5 71.6 94.9 98.0 9.07 64.5 106705227.
## 14 2.57 11.8 15751 81.6 72.5 98.8 99.2 8.60 62.6 62364158.
data_NoKepulauanSeribu <- data_gabungan[-c(1), ]
print(data_NoKepulauanSeribu)
## # A tibble: 13 × 10
## y x1 x2 x3 x4 x5 x6 x7 x8 x9
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 3.56 11.6 23888 84.9 74.2 96.5 100. 7.33 61.4 429398830.
## 2 4.28 11.7 17733 83.0 74.5 96.7 99.6 8.23 60.9 313842135.
## 3 4.94 11.4 17365 81.6 74.2 89.6 100 7.75 63.2 460081046.
## 4 4.31 10.8 20801 81.8 73.7 97.7 100 9.06 63.2 327622101.
## 5 7.24 10.8 18762 80.5 73.4 90.6 100 9.84 65.8 330894597.
## 6 8.13 8.31 10410 70.6 71.4 63.9 91.8 12.2 62.6 159582645.
## 7 5.21 9.3 11341 74.4 73.8 81.0 95.0 10.1 65.9 251828575.
## 8 7.24 10.5 11716 76.6 73.8 75.3 98.1 11.8 61.8 33372476.
## 9 4.74 11.3 15903 82.0 75.2 97.5 100 10.9 64.8 69796936.
## 10 2.58 11.5 15420 81.4 74.6 97.1 99.5 9.76 62.6 49946927.
## 11 7.12 8.61 12273 72.3 69.9 85.9 98.0 9.06 63.9 97809902.
## 12 5.93 10.8 14575 78.5 71.6 94.9 98.0 9.07 64.5 106705227.
## 13 2.57 11.8 15751 81.6 72.5 98.8 99.2 8.60 62.6 62364158.
modelRLB <- lm(y ~ x1 + x2 + x3 + x4 + x5 + x6 + x7 + x8 + x9, data = data_NoKepulauanSeribu)
summary(modelRLB)
##
## Call:
## lm(formula = y ~ x1 + x2 + x3 + x4 + x5 + x6 + x7 + x8 + x9,
## data = data_NoKepulauanSeribu)
##
## Residuals:
## 1 2 3 4 5 6 7 8
## 0.39682 0.71901 -0.55507 -0.56121 0.08233 -0.27683 0.03260 0.33861
## 9 10 11 12 13
## 0.11955 -0.36945 0.02258 1.02239 -0.97133
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.376e+01 6.718e+01 -0.205 0.851
## x1 -1.747e+00 4.460e+00 -0.392 0.721
## x2 -4.974e-04 8.374e-04 -0.594 0.594
## x3 9.628e-01 2.287e+00 0.421 0.702
## x4 -1.264e+00 9.866e-01 -1.282 0.290
## x5 -1.332e-01 2.308e-01 -0.577 0.604
## x6 5.580e-01 3.014e-01 1.851 0.161
## x7 1.079e+00 8.736e-01 1.235 0.305
## x8 1.094e-01 3.713e-01 0.295 0.788
## x9 8.539e-09 8.528e-09 1.001 0.390
##
## Residual standard error: 1.101 on 3 degrees of freedom
## Multiple R-squared: 0.908, Adjusted R-squared: 0.6318
## F-statistic: 3.288 on 9 and 3 DF, p-value: 0.1781
#
plot(modelRLB, which = 2)
# Homoskedastisitas
plot(modelRLB, which = 3)
# Independensi Residu
plot(modelRLB, which = 1)
# Multikolinieritas
vif(modelRLB)
## x1 x2 x3 x4 x5 x6 x7
## 271.993515 107.069172 993.100102 22.350514 59.277300 5.324123 16.264828
## x8 x9
## 3.434989 16.535070