Eksplorasi Data INDODAPOER

Berikut ini akan dilakukan eksplorasi data Indonesia Database for Policy and Economic Research (INDODAPOER) berkaitan dengan ekonomi dan pendidikan.

Library

library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.1.3
library(viridis)
## Warning: package 'viridis' was built under R version 4.1.3
## Loading required package: viridisLite
library(hrbrthemes)
## Warning: package 'hrbrthemes' was built under R version 4.1.3
## NOTE: Either Arial Narrow or Roboto Condensed fonts are required to use these themes.
##       Please use hrbrthemes::import_roboto_condensed() to install Roboto Condensed and
##       if Arial Narrow is not on your system, please see https://bit.ly/arialnarrow
library(gridExtra)
## 
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
## 
##     combine
library(ggrepel)
library(PerformanceAnalytics)
## Warning: package 'PerformanceAnalytics' was built under R version 4.1.3
## Loading required package: xts
## Warning: package 'xts' was built under R version 4.1.3
## Loading required package: zoo
## Warning: package 'zoo' was built under R version 4.1.3
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
## 
## Attaching package: 'xts'
## The following objects are masked from 'package:dplyr':
## 
##     first, last
## 
## Attaching package: 'PerformanceAnalytics'
## The following object is masked from 'package:graphics':
## 
##     legend

Data

employment <- read.csv("C:/Users/Asus/Downloads/DataEcoEduInd.csv", header=TRUE)
employment <- employment[,-c(1,2,3,5)]
names(employment) <- c('Provinsi', 'x1','x2','x3','x4','x5', 'x6', 'x7','x8','x9','x10','x11', 'x12', 'x13','x14','x15', 'x16')
employment <- employment %>% na.omit()
employment
##                      Provinsi       x1        x2       x3      x4      x5
## 1                       Bali  92.98390  62693.61  2490870  501235  158190
## 2                     Banten  97.62059  60194.30  5332496  704057  348997
## 3                   Bengkulu  97.90984  46317.53   963463  474705   50766
## 4              DI Yogyakarta  94.82706 104552.92  2118392  432057  160636
## 5                DKI Jakarta  99.71981 101267.32  4726779   15762  161229
## 6                  Gorontalo  98.62925  32173.73   555533  167142   35103
## 7                      Jambi  98.15025  30628.41  1721362  815049   92368
## 8                 Jawa Barat  98.48031  56686.69 20779888 2869492 1560645
## 9                Jawa Tengah  93.44631  47979.62 17245548 4204249 1508556
## 10                Jawa Timur  91.84785  52637.33 20449949 6643543 1444376
## 11          Kalimantan Barat  92.57867  35116.83  2346881 1195545  151211
## 12        Kalimantan Selatan  98.41819  40914.86  2021666  683195   92536
## 13         Kalimantan Tengah  99.20855  33262.28  1301002  499137   78437
## 14          Kalimantan Timur  98.95720  54967.07  1618285  347901   84908
## 15          Kalimantan Utara  95.17907  47407.23   323400   85193   17178
## 16 Kepulauan Bangka-Belitung  97.76474  48539.94   701366  217325   35619
## 17            Kepulauan Riau  98.87151  67925.41   901019   72615   85073
## 18                   Lampung  96.93179  38558.33  4060377 1731718  250333
## 19              Maluku Utara  98.75914  26670.76   515615  235800   25393
## 20                    Maluku  99.21793  30812.15   700143  257643   43255
## 21  Nanggroe Aceh Darussalam  98.03094  29492.09  2203717  865803  166824
## 22       Nusa Tenggara Barat  87.42448  30520.99  2154124  721283  148826
## 23       Nusa Tenggara Timur  91.89945  26508.09  2411533 1319772  106751
## 24               Papua Barat  97.37628  29064.02   417544  140447   24881
## 25                     Papua  76.78815  22421.34  1777207 1204116   45325
## 26                      Riau  99.20377  41763.26  2915597 1140824  170418
## 27            Sulawesi Barat  92.85590  28630.10   619395  323280   39739
## 28          Sulawesi Selatan  91.80612  36687.36  3774924 1426501  254738
## 29           Sulawesi Tengah  97.87339  28868.38  1451491  639023   80868
## 30         Sulawesi Tenggara  94.46407  32511.62  1207488  427659   77222
## 31            Sulawesi Utara  99.87202  31148.90  1095145  269884   96915
## 32            Sumatera Barat  99.06626  46983.95  2410450  836071  146766
## 33          Sumatera Selatan  98.65654  35435.12  3963870 1844251  202956
## 34            Sumatera Utara  99.06557  45685.35  6728431 2390797  353259
##        x6     x7      x8     x9     x10     x11     x12     x13     x14     x15
## 1   11743 122081  364685   6073  426465  820638   79760  583676   34857  171760
## 2   72016 350084 1267797  25039  790717 1341698  432091  974999  498661  661360
## 3    8321  18453   52647  15270  131870  185185   26246  340959   35061  301810
## 4   11311  86308  346919  18772  368735  601093   92561  604525   73350  460100
## 5   39187 499801  616293  26291 1102746 1645704  619766  606980  315919  373120
## 6    1795   9867   62332  15474  112780  111828   39212  157852   23347  198510
## 7    7858  31643   85674  44410  255851  330055   58454  622233   69281  281690
## 8  174940 857553 4349675 122945 3259409 6216561 1368668 4237769 1851759 3615790
## 9   86213 420952 3756317 107647 2071851 4439096  650667 4673832  817942 3897200
## 10 127035 486375 3247537 174812 2638173 4976285  711813 6166112  850474 4332590
## 11  10224  45369  150714  36230  288777  394230   74581  829993  104518  387080
## 12  13024  48542  178084  78053  308764  518502  100966  693141   95278  189030
## 13   5959  21894   71912  81662  217012  276854   48135  399993   54555  136930
## 14  15107  68728  115908 144717  317956  434865   88195  345415  114313  218900
## 15   2677  12007   27530  10752   77706   70137   20220   99513   17797   50350
## 16   3696  14179   49462  95721  109687  159259   16418  176343   26552   76260
## 17  11524  35935  210563  10465  179401  229272   66171  156002   69113  131680
## 18  17472  64327  365766  26416  486621  930246  187478 1482227  172334 1097050
## 19   2354   9072   28365  13471   99016   69187   32957  157582   25831   81460
## 20   5526   9831   59222  10663  138660  121771   53572  209228   54891  320080
## 21  11773  35151  171870  17702  432881  412027   89686  837826  151164  839490
## 22  21051  36120  266677  35210  316849  529158   78950  867585   83257  737460
## 23  11095  32808  216175  29922  338256  252369  104385 1123071   74825 1142170
## 24   2483   9466   27814   6434  102221   74353   29445  120289   28086  214470
## 25   4058  14792   32633  16226  239386  155407   65264  740469   59064  917630
## 26  14009  71370  217092  34958  465044  687709  114173 1042592  195720  500440
## 27   1711   6432   46390   5400   83402   97257   15784  297692   20227  151780
## 28  18803  91470  341716  24283  586847  854478  176088 1348344  213486  792630
## 29  10364  25692  109919  23061  266624  246244   49696  546029   51481  420210
## 30   6626  18894  108336  31387  237013  255582   44769  441020   40724  307100
## 31   6951  33388   99228  29055  212988  254142   92594  294936   80664  193310
## 32  14154  54525  210052  39617  370882  637272  101111  793035  141680  357130
## 33  11453  65154  308661  55283  504690  781037  190385 1463283  176260 1068270
## 34  47001 143366  687491  40835 1066438 1626250  372994 2147807  396027 1324980
##         x16
## 1   4292154
## 2  12689736
## 3   1963300
## 4   3802872
## 5  10467629
## 6   1185492
## 7   3570272
## 8  48683861
## 9  34490835
## 10 39500851
## 11  5001664
## 12  4182695
## 13  2660209
## 14  3648835
## 15   716407
## 16  1459873
## 17  2136521
## 18  8370485
## 19  1232632
## 20  1773776
## 21  5281314
## 22  5013687
## 23  5371519
## 24   937458
## 25  3322526
## 26  6814909
## 27  1355554
## 28  8771970
## 29  3010443
## 30  2653654
## 31  2484392
## 32  5382077
## 33  8370320
## 34 14415391

Fitur Dataset:

  1. Provinsi : Nama Provinsi

  2. X1 : Literacy Rate for Population age 15 and over (in % of total population)

  3. X2 : Monthly Per Capita Household Education Expenditure (in IDR)

  4. X3 : Number of people employed

  5. X4 : Number of people employed in agriculture, forestry and fishery

  6. X5 : Number of people employed in construction sector

  7. X6 : Number of people employed in electricity and utilities sector

  8. X7 : Number of people employed in financial services sector

  9. X8 : Number of people employed in industrial sector

  10. X9 : Number of people employed in mining and quarrying sector

  11. X10 : Number of people employed in social services sector

  12. X11 : Number of people employed in trade, hotel and restaurant sector

  13. X12 : Number of people employed in transportation and telecommunication sector

  14. x13 : Number of people underemployed

  15. x14 : Number of people unemployed

  16. x15 : Number of people live below the poverty line (in number of people)

  17. x16 : Total Population (in number of people)

Histogram

par(mfrow=c(1,3))

hist(employment$x1, breaks = 5, col = "maroon", 
     main = "Tingkat Literasi", xlab = "Tingkat Literasi")
hist(employment$x12, breaks = 10, col = "maroon", 
     main = "Pengeluaran Pendidikan", xlab = "Pengeluaran Pendidikan")
hist(employment$x13, breaks = 5, col = "maroon", 
     main = "Jumlah Penduduk Bekerja", xlab = "Jumlah Penduduk Bekerja")

Boxplot

par(mfrow=c(2,2))

boxplot(employment$x1, horizontal = TRUE, col = 'maroon', 
        xlab="x1")
boxplot(employment$x2, horizontal = TRUE, col = 'maroon', 
        xlab="x2")
boxplot(employment$x3, horizontal = TRUE, col = 'maroon', 
        xlab="x3")
boxplot(employment$x4, horizontal = TRUE, col = 'maroon', 
        xlab="x4")

par(mfrow=c(2,2))
boxplot(employment$x5, horizontal = TRUE, col = 'maroon', 
        xlab="x5")
boxplot(employment$x6, horizontal = TRUE, col = 'maroon', 
        xlab="x6")
boxplot(employment$x7, horizontal = TRUE, col = 'maroon', 
        xlab="x7")
boxplot(employment$x8, horizontal = TRUE, col = 'maroon', 
        xlab="x8")

par(mfrow=c(2,2))
boxplot(employment$x9, horizontal = TRUE, col = 'maroon', 
        xlab="x9")
boxplot(employment$x10, horizontal = TRUE, col = 'maroon', 
        xlab="x10")
boxplot(employment$x11, horizontal = TRUE, col = 'maroon', 
        xlab="x11")
boxplot(employment$x12, horizontal = TRUE, col = 'maroon', 
        xlab="x12")

par(mfrow=c(2,2))
boxplot(employment$x13, horizontal = TRUE, col = 'maroon', 
        xlab="x13")
boxplot(employment$x14, horizontal = TRUE, col = 'maroon', 
        xlab="x14")
boxplot(employment$x15, horizontal = TRUE, col = 'maroon', 
        xlab="x15")
boxplot(employment$x16, horizontal = TRUE, col = 'maroon', 
        xlab="x16")

Density Plot

par(mfrow=c(2,3))

dense <- density(employment$x1, bw=1, kernel="epanechnikov")
hist(employment$x1, freq = FALSE, breaks = 10, col = "maroon", main = "", xlab = "x1")
lines(dense, col="blue", lwd=2, main="")

dense <- density(employment$x1, bw=1, kernel="gaussian")
hist(employment$x1, freq = FALSE, breaks = 10, col = "maroon", main = "", xlab = "x1")
lines(dense, col="blue", lwd=2, main="")

dense <- density(employment$x1, bw=1, kernel="rectangular")
hist(employment$x1, freq = FALSE, breaks = 10, col = "maroon", main = "", xlab = "x1")
lines(dense, col="blue", lwd=2, main="")

dense <- density(employment$x1, bw=1, kernel="triangular")
hist(employment$x1, freq = FALSE, breaks = 10, col = "maroon", main = "", xlab = "x1")
lines(dense, col="blue", lwd=2, main="")

dense <- density(employment$x1, bw=1, kernel="biweight")
hist(employment$x1, freq = FALSE, breaks = 10, col = "maroon", main = "", xlab = "x1")
lines(dense, col="blue", lwd=2, main="")

plot(density(employment$x1), main = "density plot employment$x1")

Barplot

avgemployed <- employment %>% select(Provinsi, x1, x2, x3, x16)
colnames(avgemployed)[2] <- "TingkatLiterasi"
colnames(avgemployed)[3] <- "PengeluaranPendidikan"
colnames(avgemployed)[4] <- "Employed"
colnames(avgemployed)[5] <- "TotalPop"
avgemployed$proporsi <- avgemployed$Employed/avgemployed$TotalPop
avgemployed$JumLit <- round((avgemployed$TingkatLiterasi * avgemployed$TotalPop)/100)
avgemployed <- avgemployed %>% arrange(proporsi)
print(avgemployed[1:10,])
##                     Provinsi TingkatLiterasi PengeluaranPendidikan Employed
## 1                    Maluku         99.21793              30812.15   700143
## 2  Nanggroe Aceh Darussalam         98.03094              29492.09  2203717
## 3              Maluku Utara         98.75914              26670.76   515615
## 4                    Banten         97.62059              60194.30  5332496
## 5            Kepulauan Riau         98.87151              67925.41   901019
## 6                Jawa Barat         98.48031              56686.69 20779888
## 7                      Riau         99.20377              41763.26  2915597
## 8       Nusa Tenggara Barat         87.42448              30520.99  2154124
## 9          Sulawesi Selatan         91.80612              36687.36  3774924
## 10           Sulawesi Utara         99.87202              31148.90  1095145
##    TotalPop  proporsi   JumLit
## 1   1773776 0.3947189  1759904
## 2   5281314 0.4172668  5177322
## 3   1232632 0.4183041  1217337
## 4  12689736 0.4202212 12387795
## 5   2136521 0.4217225  2112411
## 6  48683861 0.4268332 47944017
## 7   6814909 0.4278263  6760646
## 8   5013687 0.4296487  4383190
## 9   8771970 0.4303394  8053205
## 10  2484392 0.4408101  2481212
print(avgemployed[25:34,])
##               Provinsi TingkatLiterasi PengeluaranPendidikan Employed TotalPop
## 25    Sulawesi Tengah         97.87339              28868.38  1451491  3010443
## 26 Kalimantan Selatan         98.41819              40914.86  2021666  4182695
## 27            Lampung         96.93179              38558.33  4060377  8370485
## 28  Kalimantan Tengah         99.20855              33262.28  1301002  2660209
## 29           Bengkulu         97.90984              46317.53   963463  1963300
## 30        Jawa Tengah         93.44631              47979.62 17245548 34490835
## 31         Jawa Timur         91.84785              52637.33 20449949 39500851
## 32              Papua         76.78815              22421.34  1777207  3322526
## 33      DI Yogyakarta         94.82706             104552.92  2118392  3802872
## 34               Bali         92.98390              62693.61  2490870  4292154
##     proporsi   JumLit
## 25 0.4821520  2946423
## 26 0.4833405  4116533
## 27 0.4850826  8113661
## 28 0.4890601  2639155
## 29 0.4907365  1922264
## 30 0.5000038 32230413
## 31 0.5177091 36280684
## 32 0.5348963  2551306
## 33 0.5570506  3606152
## 34 0.5803310  3991012
par(mfrow=c(2,1))
prov <- c("Maluku", "NAD", "Malut", "Banten", "Kepri")
barplot(avgemployed$PengeluaranPendidikan[1:5],
        names.arg = prov,
        ylab = "Pengeluaran Pendidikan", xlab = "Provinsi",
        col = "maroon", ylim = c(0, 63000))
prov <- c("Jateng", "Jatim", "Papua", "DIY", "Bali")
barplot(avgemployed$PengeluaranPendidikan[30:34],
        names.arg = prov,
        ylab = "Pengeluaran Pendidikan", xlab = "Provinsi",
        col = "maroon", ylim = c(0, 63000))

par(mfrow=c(2,1))
prov <- c("Maluku", "NAD", "Malut", "Banten", "Kepri")
barplot(avgemployed$JumLit[1:5],
        names.arg = prov,
        ylab = "Tingkat Literasi", xlab = "Provinsi",
        col = "navy", ylim = c(0, 3e+07))
prov <- c("Jateng", "Jatim", "Papua", "DIY", "Bali")
barplot(avgemployed$JumLit[30:34],
        names.arg = prov,
        ylab = "Tingkat Literasi", xlab = "Provinsi",
        col = "navy", ylim = c(0, 3e+07))

Pie Chart

sector <- employment %>% select(Provinsi, x4, x5, x6, x7, x8, x9, x10, x11, x12, x16) %>% filter(Provinsi == "Bali ")
sector <- sector %>% select(x4,x5,x6,x7,x8,x9, x10, x11, x12)
sector
##       x4     x5    x6     x7     x8   x9    x10    x11   x12
## 1 501235 158190 11743 122081 364685 6073 426465 820638 79760
x <- c(sector$x11, sector$x4, sector$x10, sector$x8, sector$x5, sector$x6+ sector$x9 + sector$x7 + sector$x12)
lab <- c("Trade, Hotel, Restaurant", "Agriculture, Forestry, Fishery", "Social Services", "Industrial", "Construction", "Etc")
pie(x, labels = lab)

Scatter Plot

par(mfrow=c(1, 2))

plot(employment$x13, employment$x15, ylab="Number of People Unemployed",
     xlab = "Number of People Live Below the Poverty Line",
     pch = 19, col = "maroon", cex = 1.5)

plot(employment$x16, employment$x15, ylab="Total Population",
     xlab = "Number of People Live Below the Poverty Line",
     pch = 19, col = "maroon", cex = 1.5)

Matrix correlation

chart.Correlation(employment[, -c(1, 4:13)], histogram = TRUE, pch= 19)

Bubble Plot

employment %>% mutate(x15=x15/1000000, x13=x13/1000000, x16=x16/1000000) %>%
  ggplot(aes(x = x15, y = x13, size = x16, color = Provinsi)) +
  geom_point(alpha = 0.7) + 
  scale_size(range = c(1.4, 19), name="Population (M)") +
    scale_color_viridis(discrete=TRUE, guide=FALSE) +
    theme_ipsum() +
    theme(legend.position="bottom") +
    geom_text_repel(aes(label=Provinsi), size=4 )
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family not
## found in Windows font database

## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family not
## found in Windows font database
## Warning: It is deprecated to specify `guide = FALSE` to remove a guide. Please
## use `guide = "none"` instead.
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family not
## found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning: ggrepel: 28 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

Local Regression

data <- employment

unemp <- range(data$x13)

plot(data$x13, data$x15 , xlim = unemp, cex = .5, col = "green",
     xlab = "Number of People Unemployed", ylab = "Number of People Live Below Poverty Line")
title ("Local Regression ")

fit <- loess(x15~x13, span = .2, data = data)
fit2 <- loess(x15~x13, span = .5, data = data)
fit3 <- loess(x15~x13, span = .75, data = data)

x13.grid <- seq(from = unemp[1], to = unemp[2])

lines(x13.grid, predict(fit, data.frame(x13 = x13.grid)), col = "maroon", lwd =2)
lines(x13.grid, predict(fit2, data.frame(x13 = x13.grid)), col = "orange", lwd =2)
lines(x13.grid, predict(fit3, data.frame(x13 = x13.grid)), col = "navy", lwd =2)

legend("topright", legend = c("Span = 0.2" , "Span = 0.5", "Span = 0.75"), 
       col = c("maroon", "orange", "navy"),lty =1, lwd =2, cex =.8)