Eksplorasi Data INDODAPOER
Berikut ini akan dilakukan eksplorasi data Indonesia Database for Policy and Economic Research (INDODAPOER) berkaitan dengan ekonomi dan pendidikan.
Library
library(dplyr)##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)## Warning: package 'ggplot2' was built under R version 4.1.3
library(viridis)## Warning: package 'viridis' was built under R version 4.1.3
## Loading required package: viridisLite
library(hrbrthemes)## Warning: package 'hrbrthemes' was built under R version 4.1.3
## NOTE: Either Arial Narrow or Roboto Condensed fonts are required to use these themes.
## Please use hrbrthemes::import_roboto_condensed() to install Roboto Condensed and
## if Arial Narrow is not on your system, please see https://bit.ly/arialnarrow
library(gridExtra)##
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
##
## combine
library(ggrepel)
library(PerformanceAnalytics)## Warning: package 'PerformanceAnalytics' was built under R version 4.1.3
## Loading required package: xts
## Warning: package 'xts' was built under R version 4.1.3
## Loading required package: zoo
## Warning: package 'zoo' was built under R version 4.1.3
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
##
## Attaching package: 'xts'
## The following objects are masked from 'package:dplyr':
##
## first, last
##
## Attaching package: 'PerformanceAnalytics'
## The following object is masked from 'package:graphics':
##
## legend
Data
employment <- read.csv("C:/Users/Asus/Downloads/DataEcoEduInd.csv", header=TRUE)
employment <- employment[,-c(1,2,3,5)]
names(employment) <- c('Provinsi', 'x1','x2','x3','x4','x5', 'x6', 'x7','x8','x9','x10','x11', 'x12', 'x13','x14','x15', 'x16')
employment <- employment %>% na.omit()
employment## Provinsi x1 x2 x3 x4 x5
## 1 Bali 92.98390 62693.61 2490870 501235 158190
## 2 Banten 97.62059 60194.30 5332496 704057 348997
## 3 Bengkulu 97.90984 46317.53 963463 474705 50766
## 4 DI Yogyakarta 94.82706 104552.92 2118392 432057 160636
## 5 DKI Jakarta 99.71981 101267.32 4726779 15762 161229
## 6 Gorontalo 98.62925 32173.73 555533 167142 35103
## 7 Jambi 98.15025 30628.41 1721362 815049 92368
## 8 Jawa Barat 98.48031 56686.69 20779888 2869492 1560645
## 9 Jawa Tengah 93.44631 47979.62 17245548 4204249 1508556
## 10 Jawa Timur 91.84785 52637.33 20449949 6643543 1444376
## 11 Kalimantan Barat 92.57867 35116.83 2346881 1195545 151211
## 12 Kalimantan Selatan 98.41819 40914.86 2021666 683195 92536
## 13 Kalimantan Tengah 99.20855 33262.28 1301002 499137 78437
## 14 Kalimantan Timur 98.95720 54967.07 1618285 347901 84908
## 15 Kalimantan Utara 95.17907 47407.23 323400 85193 17178
## 16 Kepulauan Bangka-Belitung 97.76474 48539.94 701366 217325 35619
## 17 Kepulauan Riau 98.87151 67925.41 901019 72615 85073
## 18 Lampung 96.93179 38558.33 4060377 1731718 250333
## 19 Maluku Utara 98.75914 26670.76 515615 235800 25393
## 20 Maluku 99.21793 30812.15 700143 257643 43255
## 21 Nanggroe Aceh Darussalam 98.03094 29492.09 2203717 865803 166824
## 22 Nusa Tenggara Barat 87.42448 30520.99 2154124 721283 148826
## 23 Nusa Tenggara Timur 91.89945 26508.09 2411533 1319772 106751
## 24 Papua Barat 97.37628 29064.02 417544 140447 24881
## 25 Papua 76.78815 22421.34 1777207 1204116 45325
## 26 Riau 99.20377 41763.26 2915597 1140824 170418
## 27 Sulawesi Barat 92.85590 28630.10 619395 323280 39739
## 28 Sulawesi Selatan 91.80612 36687.36 3774924 1426501 254738
## 29 Sulawesi Tengah 97.87339 28868.38 1451491 639023 80868
## 30 Sulawesi Tenggara 94.46407 32511.62 1207488 427659 77222
## 31 Sulawesi Utara 99.87202 31148.90 1095145 269884 96915
## 32 Sumatera Barat 99.06626 46983.95 2410450 836071 146766
## 33 Sumatera Selatan 98.65654 35435.12 3963870 1844251 202956
## 34 Sumatera Utara 99.06557 45685.35 6728431 2390797 353259
## x6 x7 x8 x9 x10 x11 x12 x13 x14 x15
## 1 11743 122081 364685 6073 426465 820638 79760 583676 34857 171760
## 2 72016 350084 1267797 25039 790717 1341698 432091 974999 498661 661360
## 3 8321 18453 52647 15270 131870 185185 26246 340959 35061 301810
## 4 11311 86308 346919 18772 368735 601093 92561 604525 73350 460100
## 5 39187 499801 616293 26291 1102746 1645704 619766 606980 315919 373120
## 6 1795 9867 62332 15474 112780 111828 39212 157852 23347 198510
## 7 7858 31643 85674 44410 255851 330055 58454 622233 69281 281690
## 8 174940 857553 4349675 122945 3259409 6216561 1368668 4237769 1851759 3615790
## 9 86213 420952 3756317 107647 2071851 4439096 650667 4673832 817942 3897200
## 10 127035 486375 3247537 174812 2638173 4976285 711813 6166112 850474 4332590
## 11 10224 45369 150714 36230 288777 394230 74581 829993 104518 387080
## 12 13024 48542 178084 78053 308764 518502 100966 693141 95278 189030
## 13 5959 21894 71912 81662 217012 276854 48135 399993 54555 136930
## 14 15107 68728 115908 144717 317956 434865 88195 345415 114313 218900
## 15 2677 12007 27530 10752 77706 70137 20220 99513 17797 50350
## 16 3696 14179 49462 95721 109687 159259 16418 176343 26552 76260
## 17 11524 35935 210563 10465 179401 229272 66171 156002 69113 131680
## 18 17472 64327 365766 26416 486621 930246 187478 1482227 172334 1097050
## 19 2354 9072 28365 13471 99016 69187 32957 157582 25831 81460
## 20 5526 9831 59222 10663 138660 121771 53572 209228 54891 320080
## 21 11773 35151 171870 17702 432881 412027 89686 837826 151164 839490
## 22 21051 36120 266677 35210 316849 529158 78950 867585 83257 737460
## 23 11095 32808 216175 29922 338256 252369 104385 1123071 74825 1142170
## 24 2483 9466 27814 6434 102221 74353 29445 120289 28086 214470
## 25 4058 14792 32633 16226 239386 155407 65264 740469 59064 917630
## 26 14009 71370 217092 34958 465044 687709 114173 1042592 195720 500440
## 27 1711 6432 46390 5400 83402 97257 15784 297692 20227 151780
## 28 18803 91470 341716 24283 586847 854478 176088 1348344 213486 792630
## 29 10364 25692 109919 23061 266624 246244 49696 546029 51481 420210
## 30 6626 18894 108336 31387 237013 255582 44769 441020 40724 307100
## 31 6951 33388 99228 29055 212988 254142 92594 294936 80664 193310
## 32 14154 54525 210052 39617 370882 637272 101111 793035 141680 357130
## 33 11453 65154 308661 55283 504690 781037 190385 1463283 176260 1068270
## 34 47001 143366 687491 40835 1066438 1626250 372994 2147807 396027 1324980
## x16
## 1 4292154
## 2 12689736
## 3 1963300
## 4 3802872
## 5 10467629
## 6 1185492
## 7 3570272
## 8 48683861
## 9 34490835
## 10 39500851
## 11 5001664
## 12 4182695
## 13 2660209
## 14 3648835
## 15 716407
## 16 1459873
## 17 2136521
## 18 8370485
## 19 1232632
## 20 1773776
## 21 5281314
## 22 5013687
## 23 5371519
## 24 937458
## 25 3322526
## 26 6814909
## 27 1355554
## 28 8771970
## 29 3010443
## 30 2653654
## 31 2484392
## 32 5382077
## 33 8370320
## 34 14415391
Fitur Dataset:
Provinsi : Nama Provinsi
X1 : Literacy Rate for Population age 15 and over (in % of total population)
X2 : Monthly Per Capita Household Education Expenditure (in IDR)
X3 : Number of people employed
X4 : Number of people employed in agriculture, forestry and fishery
X5 : Number of people employed in construction sector
X6 : Number of people employed in electricity and utilities sector
X7 : Number of people employed in financial services sector
X8 : Number of people employed in industrial sector
X9 : Number of people employed in mining and quarrying sector
X10 : Number of people employed in social services sector
X11 : Number of people employed in trade, hotel and restaurant sector
X12 : Number of people employed in transportation and telecommunication sector
x13 : Number of people underemployed
x14 : Number of people unemployed
x15 : Number of people live below the poverty line (in number of people)
x16 : Total Population (in number of people)
Histogram
par(mfrow=c(1,3))
hist(employment$x1, breaks = 5, col = "maroon",
main = "Tingkat Literasi", xlab = "Tingkat Literasi")
hist(employment$x12, breaks = 10, col = "maroon",
main = "Pengeluaran Pendidikan", xlab = "Pengeluaran Pendidikan")
hist(employment$x13, breaks = 5, col = "maroon",
main = "Jumlah Penduduk Bekerja", xlab = "Jumlah Penduduk Bekerja")Boxplot
par(mfrow=c(2,2))
boxplot(employment$x1, horizontal = TRUE, col = 'maroon',
xlab="x1")
boxplot(employment$x2, horizontal = TRUE, col = 'maroon',
xlab="x2")
boxplot(employment$x3, horizontal = TRUE, col = 'maroon',
xlab="x3")
boxplot(employment$x4, horizontal = TRUE, col = 'maroon',
xlab="x4")par(mfrow=c(2,2))
boxplot(employment$x5, horizontal = TRUE, col = 'maroon',
xlab="x5")
boxplot(employment$x6, horizontal = TRUE, col = 'maroon',
xlab="x6")
boxplot(employment$x7, horizontal = TRUE, col = 'maroon',
xlab="x7")
boxplot(employment$x8, horizontal = TRUE, col = 'maroon',
xlab="x8")par(mfrow=c(2,2))
boxplot(employment$x9, horizontal = TRUE, col = 'maroon',
xlab="x9")
boxplot(employment$x10, horizontal = TRUE, col = 'maroon',
xlab="x10")
boxplot(employment$x11, horizontal = TRUE, col = 'maroon',
xlab="x11")
boxplot(employment$x12, horizontal = TRUE, col = 'maroon',
xlab="x12")par(mfrow=c(2,2))
boxplot(employment$x13, horizontal = TRUE, col = 'maroon',
xlab="x13")
boxplot(employment$x14, horizontal = TRUE, col = 'maroon',
xlab="x14")
boxplot(employment$x15, horizontal = TRUE, col = 'maroon',
xlab="x15")
boxplot(employment$x16, horizontal = TRUE, col = 'maroon',
xlab="x16")Density Plot
par(mfrow=c(2,3))
dense <- density(employment$x1, bw=1, kernel="epanechnikov")
hist(employment$x1, freq = FALSE, breaks = 10, col = "maroon", main = "", xlab = "x1")
lines(dense, col="blue", lwd=2, main="")
dense <- density(employment$x1, bw=1, kernel="gaussian")
hist(employment$x1, freq = FALSE, breaks = 10, col = "maroon", main = "", xlab = "x1")
lines(dense, col="blue", lwd=2, main="")
dense <- density(employment$x1, bw=1, kernel="rectangular")
hist(employment$x1, freq = FALSE, breaks = 10, col = "maroon", main = "", xlab = "x1")
lines(dense, col="blue", lwd=2, main="")
dense <- density(employment$x1, bw=1, kernel="triangular")
hist(employment$x1, freq = FALSE, breaks = 10, col = "maroon", main = "", xlab = "x1")
lines(dense, col="blue", lwd=2, main="")
dense <- density(employment$x1, bw=1, kernel="biweight")
hist(employment$x1, freq = FALSE, breaks = 10, col = "maroon", main = "", xlab = "x1")
lines(dense, col="blue", lwd=2, main="")
plot(density(employment$x1), main = "density plot employment$x1")Barplot
avgemployed <- employment %>% select(Provinsi, x1, x2, x3, x16)
colnames(avgemployed)[2] <- "TingkatLiterasi"
colnames(avgemployed)[3] <- "PengeluaranPendidikan"
colnames(avgemployed)[4] <- "Employed"
colnames(avgemployed)[5] <- "TotalPop"
avgemployed$proporsi <- avgemployed$Employed/avgemployed$TotalPop
avgemployed$JumLit <- round((avgemployed$TingkatLiterasi * avgemployed$TotalPop)/100)
avgemployed <- avgemployed %>% arrange(proporsi)
print(avgemployed[1:10,])## Provinsi TingkatLiterasi PengeluaranPendidikan Employed
## 1 Maluku 99.21793 30812.15 700143
## 2 Nanggroe Aceh Darussalam 98.03094 29492.09 2203717
## 3 Maluku Utara 98.75914 26670.76 515615
## 4 Banten 97.62059 60194.30 5332496
## 5 Kepulauan Riau 98.87151 67925.41 901019
## 6 Jawa Barat 98.48031 56686.69 20779888
## 7 Riau 99.20377 41763.26 2915597
## 8 Nusa Tenggara Barat 87.42448 30520.99 2154124
## 9 Sulawesi Selatan 91.80612 36687.36 3774924
## 10 Sulawesi Utara 99.87202 31148.90 1095145
## TotalPop proporsi JumLit
## 1 1773776 0.3947189 1759904
## 2 5281314 0.4172668 5177322
## 3 1232632 0.4183041 1217337
## 4 12689736 0.4202212 12387795
## 5 2136521 0.4217225 2112411
## 6 48683861 0.4268332 47944017
## 7 6814909 0.4278263 6760646
## 8 5013687 0.4296487 4383190
## 9 8771970 0.4303394 8053205
## 10 2484392 0.4408101 2481212
print(avgemployed[25:34,])## Provinsi TingkatLiterasi PengeluaranPendidikan Employed TotalPop
## 25 Sulawesi Tengah 97.87339 28868.38 1451491 3010443
## 26 Kalimantan Selatan 98.41819 40914.86 2021666 4182695
## 27 Lampung 96.93179 38558.33 4060377 8370485
## 28 Kalimantan Tengah 99.20855 33262.28 1301002 2660209
## 29 Bengkulu 97.90984 46317.53 963463 1963300
## 30 Jawa Tengah 93.44631 47979.62 17245548 34490835
## 31 Jawa Timur 91.84785 52637.33 20449949 39500851
## 32 Papua 76.78815 22421.34 1777207 3322526
## 33 DI Yogyakarta 94.82706 104552.92 2118392 3802872
## 34 Bali 92.98390 62693.61 2490870 4292154
## proporsi JumLit
## 25 0.4821520 2946423
## 26 0.4833405 4116533
## 27 0.4850826 8113661
## 28 0.4890601 2639155
## 29 0.4907365 1922264
## 30 0.5000038 32230413
## 31 0.5177091 36280684
## 32 0.5348963 2551306
## 33 0.5570506 3606152
## 34 0.5803310 3991012
par(mfrow=c(2,1))
prov <- c("Maluku", "NAD", "Malut", "Banten", "Kepri")
barplot(avgemployed$PengeluaranPendidikan[1:5],
names.arg = prov,
ylab = "Pengeluaran Pendidikan", xlab = "Provinsi",
col = "maroon", ylim = c(0, 63000))
prov <- c("Jateng", "Jatim", "Papua", "DIY", "Bali")
barplot(avgemployed$PengeluaranPendidikan[30:34],
names.arg = prov,
ylab = "Pengeluaran Pendidikan", xlab = "Provinsi",
col = "maroon", ylim = c(0, 63000))par(mfrow=c(2,1))
prov <- c("Maluku", "NAD", "Malut", "Banten", "Kepri")
barplot(avgemployed$JumLit[1:5],
names.arg = prov,
ylab = "Tingkat Literasi", xlab = "Provinsi",
col = "navy", ylim = c(0, 3e+07))
prov <- c("Jateng", "Jatim", "Papua", "DIY", "Bali")
barplot(avgemployed$JumLit[30:34],
names.arg = prov,
ylab = "Tingkat Literasi", xlab = "Provinsi",
col = "navy", ylim = c(0, 3e+07))Pie Chart
sector <- employment %>% select(Provinsi, x4, x5, x6, x7, x8, x9, x10, x11, x12, x16) %>% filter(Provinsi == "Bali ")
sector <- sector %>% select(x4,x5,x6,x7,x8,x9, x10, x11, x12)
sector## x4 x5 x6 x7 x8 x9 x10 x11 x12
## 1 501235 158190 11743 122081 364685 6073 426465 820638 79760
x <- c(sector$x11, sector$x4, sector$x10, sector$x8, sector$x5, sector$x6+ sector$x9 + sector$x7 + sector$x12)
lab <- c("Trade, Hotel, Restaurant", "Agriculture, Forestry, Fishery", "Social Services", "Industrial", "Construction", "Etc")
pie(x, labels = lab)Scatter Plot
par(mfrow=c(1, 2))
plot(employment$x13, employment$x15, ylab="Number of People Unemployed",
xlab = "Number of People Live Below the Poverty Line",
pch = 19, col = "maroon", cex = 1.5)
plot(employment$x16, employment$x15, ylab="Total Population",
xlab = "Number of People Live Below the Poverty Line",
pch = 19, col = "maroon", cex = 1.5)Matrix correlation
chart.Correlation(employment[, -c(1, 4:13)], histogram = TRUE, pch= 19)Bubble Plot
employment %>% mutate(x15=x15/1000000, x13=x13/1000000, x16=x16/1000000) %>%
ggplot(aes(x = x15, y = x13, size = x16, color = Provinsi)) +
geom_point(alpha = 0.7) +
scale_size(range = c(1.4, 19), name="Population (M)") +
scale_color_viridis(discrete=TRUE, guide=FALSE) +
theme_ipsum() +
theme(legend.position="bottom") +
geom_text_repel(aes(label=Provinsi), size=4 )## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family not
## found in Windows font database
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family not
## found in Windows font database
## Warning: It is deprecated to specify `guide = FALSE` to remove a guide. Please
## use `guide = "none"` instead.
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family not
## found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning: ggrepel: 28 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
Local Regression
data <- employment
unemp <- range(data$x13)
plot(data$x13, data$x15 , xlim = unemp, cex = .5, col = "green",
xlab = "Number of People Unemployed", ylab = "Number of People Live Below Poverty Line")
title ("Local Regression ")
fit <- loess(x15~x13, span = .2, data = data)
fit2 <- loess(x15~x13, span = .5, data = data)
fit3 <- loess(x15~x13, span = .75, data = data)
x13.grid <- seq(from = unemp[1], to = unemp[2])
lines(x13.grid, predict(fit, data.frame(x13 = x13.grid)), col = "maroon", lwd =2)
lines(x13.grid, predict(fit2, data.frame(x13 = x13.grid)), col = "orange", lwd =2)
lines(x13.grid, predict(fit3, data.frame(x13 = x13.grid)), col = "navy", lwd =2)
legend("topright", legend = c("Span = 0.2" , "Span = 0.5", "Span = 0.75"),
col = c("maroon", "orange", "navy"),lty =1, lwd =2, cex =.8)