This session will discuss the geographic conditions of DKI Jakarta Province in 2013. Below are some of the libraries that we need
library(tidyverse)
library(reshape2)
library(readxl)
Download this data here dataset
input our data dkikepadatankelurahan2013
data <- read_excel("~/dkikepadatankelurahan2013.xlsx")
we can see type of data
glimpse(data)
## Rows: 267
## Columns: 37
## $ TAHUN <dbl> 2013, 2013, 2013, 2013, 2013, 2013, 2013, 20...
## $ `NAMA PROVINSI` <chr> "PROVINSI DKI JAKARTA", "PROVINSI DKI JAKART...
## $ `NAMA KABUPATEN/KOTA` <chr> "KAB.ADM.KEP.SERIBU", "KAB.ADM.KEP.SERIBU", ...
## $ `NAMA KECAMATAN` <chr> "KEP. SERIBU UTR", "KEP. SERIBU UTR", "KEP. ...
## $ `NAMA KELURAHAN` <chr> "P. PANGGANG", "P. KELAPA", "P. HARAPAN", "P...
## $ `LUAS WILAYAH (KM2)` <dbl> 0.91, 3.76, 3.59, 0.59, 1.57, 1.39, 2.58, 1....
## $ `KEPADATAN (JIWA/KM2)` <dbl> 6779, 1705, 628, 3625, 3084, 1968, 1350, 145...
## $ ...8 <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, ...
## $ ...9 <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, ...
## $ ...10 <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, ...
## $ ...11 <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, ...
## $ ...12 <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, ...
## $ ...13 <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, ...
## $ ...14 <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, ...
## $ ...15 <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, ...
## $ ...16 <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, ...
## $ ...17 <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, ...
## $ ...18 <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, ...
## $ ...19 <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, ...
## $ `35-39 Laki-Laki` <dbl> 231, 84, 255, 199, 98, 113, 166, 850, 954, 7...
## $ `35-39 Perempuan` <dbl> 235, 88, 238, 185, 75, 112, 174, 748, 920, 6...
## $ `40-44 Laki-Laki` <dbl> 233, 99, 232, 178, 73, 108, 130, 749, 914, 6...
## $ `40-44 Perempuan` <dbl> 210, 88, 234, 176, 94, 80, 165, 798, 943, 69...
## $ `45-49 Laki-Laki` <dbl> 171, 72, 212, 162, 67, 66, 176, 779, 871, 65...
## $ `45-49 Perempuan` <dbl> 158, 63, 193, 139, 69, 62, 162, 766, 823, 63...
## $ `50-54 Laki-Laki` <dbl> 137, 34, 150, 100, 60, 61, 129, 715, 736, 61...
## $ `50-54 Perempuan` <dbl> 126, 29, 161, 119, 40, 63, 97, 662, 679, 514...
## $ `55-59 Laki-Laki` <dbl> 98, 30, 139, 97, 37, 37, 108, 614, 680, 539,...
## $ `55-59 Perempuan` <dbl> 106, 39, 101, 83, 32, 36, 90, 537, 510, 466,...
## $ `60-64 Laki-Laki` <dbl> 72, 29, 73, 58, 22, 32, 88, 555, 544, 428, 3...
## $ `60-64 Perempuan` <dbl> 65, 24, 56, 56, 13, 26, 42, 343, 421, 279, 2...
## $ `65-69 Laki-Laki` <dbl> 36, 12, 18, 40, 18, 21, 68, 413, 398, 328, 2...
## $ `65-69 Perempuan` <dbl> 33, 21, 35, 54, 15, 14, 34, 215, 235, 160, 1...
## $ `70-74 Laki-Laki` <dbl> 33, 13, 24, 26, 10, 17, 37, 259, 241, 215, 1...
## $ `70-74 Perempuan` <dbl> 20, 5, 25, 27, 18, 11, 32, 142, 132, 116, 10...
## $ `>75 Laki-Laki` <dbl> 13, 5, 18, 16, 11, 8, 34, 214, 215, 150, 136...
## $ `>75 Perempuan` <dbl> 27, 8, 26, 13, 17, 7, 23, 165, 159, 121, 72,...
as we can see, there are some columns have missing value, now we’re going to take care of it by removing that column
data <- data %>%
select(-c(8:19))
Column 8 to Column 19 is an empty column, so we do the command to remove that column
After removing empty columns, we will make sure that there are no empty columns in the data
colSums(is.na(data))
## TAHUN NAMA PROVINSI NAMA KABUPATEN/KOTA
## 0 0 0
## NAMA KECAMATAN NAMA KELURAHAN LUAS WILAYAH (KM2)
## 0 0 0
## KEPADATAN (JIWA/KM2) 35-39 Laki-Laki 35-39 Perempuan
## 0 0 0
## 40-44 Laki-Laki 40-44 Perempuan 45-49 Laki-Laki
## 0 0 0
## 45-49 Perempuan 50-54 Laki-Laki 50-54 Perempuan
## 0 0 0
## 55-59 Laki-Laki 55-59 Perempuan 60-64 Laki-Laki
## 0 0 0
## 60-64 Perempuan 65-69 Laki-Laki 65-69 Perempuan
## 0 0 0
## 70-74 Laki-Laki 70-74 Perempuan >75 Laki-Laki
## 0 0 0
## >75 Perempuan
## 0
we want change the data format to wide to long to make it easier to analyze
data <- melt(data,
id.vars = names(data)[c(1:7)],
measure.vars = names(data)[c(8:25)],
variable.name = "kategori",
value.name = "TOTAL")
head(data)
## TAHUN NAMA PROVINSI NAMA KABUPATEN/KOTA NAMA KECAMATAN NAMA KELURAHAN
## 1 2013 PROVINSI DKI JAKARTA KAB.ADM.KEP.SERIBU KEP. SERIBU UTR P. PANGGANG
## 2 2013 PROVINSI DKI JAKARTA KAB.ADM.KEP.SERIBU KEP. SERIBU UTR P. KELAPA
## 3 2013 PROVINSI DKI JAKARTA KAB.ADM.KEP.SERIBU KEP. SERIBU UTR P. HARAPAN
## 4 2013 PROVINSI DKI JAKARTA KAB.ADM.KEP.SERIBU KEP. SERIBU SLT P. UNTUNG JAWA
## 5 2013 PROVINSI DKI JAKARTA KAB.ADM.KEP.SERIBU KEP. SERIBU SLT P. TIDUNG
## 6 2013 PROVINSI DKI JAKARTA KAB.ADM.KEP.SERIBU KEP. SERIBU SLT P. PARI
## LUAS WILAYAH (KM2) KEPADATAN (JIWA/KM2) kategori TOTAL
## 1 0.91 6779 35-39 Laki-Laki 231
## 2 3.76 1705 35-39 Laki-Laki 84
## 3 3.59 628 35-39 Laki-Laki 255
## 4 0.59 3625 35-39 Laki-Laki 199
## 5 1.57 3084 35-39 Laki-Laki 98
## 6 1.39 1968 35-39 Laki-Laki 113
before separating the column kategori, we must see whether the dividers in the column are all same
data.frame(kategori = unique(data$kategori), jumlah_character = nchar(as.character(unique(data$kategori))))
## kategori jumlah_character
## 1 35-39 Laki-Laki 15
## 2 35-39 Perempuan 15
## 3 40-44 Laki-Laki 15
## 4 40-44 Perempuan 15
## 5 45-49 Laki-Laki 15
## 6 45-49 Perempuan 15
## 7 50-54 Laki-Laki 15
## 8 50-54 Perempuan 15
## 9 55-59 Laki-Laki 15
## 10 55-59 Perempuan 15
## 11 60-64 Laki-Laki 15
## 12 60-64 Perempuan 15
## 13 65-69 Laki-Laki 15
## 14 65-69 Perempuan 15
## 15 70-74 Laki-Laki 15
## 16 70-74 Perempuan 15
## 17 >75 Laki-Laki 13
## 18 >75 Perempuan 14
>75 Laki-Laki & >75 Perempuan have a different number of characters, this indicates that there are repeated spaces in the sentence. we can fix it using gsub
data$kategori = gsub("[ ]{2,}"," ", data$kategori)
now we want to separare kategori column into two pieces
data <- data %>%
separate(kategori, c("UMUR","JENIS KELAMIN"), sep = " ")
head(data)
## TAHUN NAMA PROVINSI NAMA KABUPATEN/KOTA NAMA KECAMATAN NAMA KELURAHAN
## 1 2013 PROVINSI DKI JAKARTA KAB.ADM.KEP.SERIBU KEP. SERIBU UTR P. PANGGANG
## 2 2013 PROVINSI DKI JAKARTA KAB.ADM.KEP.SERIBU KEP. SERIBU UTR P. KELAPA
## 3 2013 PROVINSI DKI JAKARTA KAB.ADM.KEP.SERIBU KEP. SERIBU UTR P. HARAPAN
## 4 2013 PROVINSI DKI JAKARTA KAB.ADM.KEP.SERIBU KEP. SERIBU SLT P. UNTUNG JAWA
## 5 2013 PROVINSI DKI JAKARTA KAB.ADM.KEP.SERIBU KEP. SERIBU SLT P. TIDUNG
## 6 2013 PROVINSI DKI JAKARTA KAB.ADM.KEP.SERIBU KEP. SERIBU SLT P. PARI
## LUAS WILAYAH (KM2) KEPADATAN (JIWA/KM2) UMUR JENIS KELAMIN TOTAL
## 1 0.91 6779 35-39 Laki-Laki 231
## 2 3.76 1705 35-39 Laki-Laki 84
## 3 3.59 628 35-39 Laki-Laki 255
## 4 0.59 3625 35-39 Laki-Laki 199
## 5 1.57 3084 35-39 Laki-Laki 98
## 6 1.39 1968 35-39 Laki-Laki 113
1) we want to know sums of “JENIS KELAMIN” each of “KABUPATEN/KOTA”
data %>%
group_by(`NAMA KABUPATEN/KOTA`, `JENIS KELAMIN`) %>%
summarise(TOTAL = sum(TOTAL)) -> plot1
head(plot1)
## # A tibble: 6 x 3
## # Groups: NAMA KABUPATEN/KOTA [3]
## `NAMA KABUPATEN/KOTA` `JENIS KELAMIN` TOTAL
## <chr> <chr> <dbl>
## 1 JAKARTA BARAT Laki-Laki 1177351
## 2 JAKARTA BARAT Perempuan 1138830
## 3 JAKARTA PUSAT Laki-Laki 270490
## 4 JAKARTA PUSAT Perempuan 229566
## 5 JAKARTA SELATAN Laki-Laki 445152
## 6 JAKARTA SELATAN Perempuan 432985
ggplot(data = plot1, aes(y = reorder(`NAMA KABUPATEN/KOTA`, TOTAL),
x = TOTAL, group = `JENIS KELAMIN`,
fill = `JENIS KELAMIN`)) + geom_bar(stat="identity",
position = "dodge") +
labs(x = NULL, y = NULL,
title = "Total Population of each District / City", fill = NULL) +
scale_fill_manual(values = c("#4d94ff","#ff4d94")) +
xlim(0,1250000) +
theme_minimal() +
theme(legend.position = "top",
plot.title = element_text(hjust = .5))
Interpretation
1) West Jakarta is the city with the highest population
2) There are always more men than women
2) we want to know sum of area compared to sum of population to find out the population density there
data %>%
group_by(`NAMA KABUPATEN/KOTA`) %>%
summarise(large = sum(`LUAS WILAYAH (KM2)`), jumlah = sum(TOTAL)) -> plot2
head(plot2)
## # A tibble: 6 x 3
## `NAMA KABUPATEN/KOTA` large jumlah
## <chr> <dbl> <dbl>
## 1 JAKARTA BARAT 2288. 2316181
## 2 JAKARTA PUSAT 865. 500056
## 3 JAKARTA SELATAN 2623. 878137
## 4 JAKARTA TIMUR 3387. 1136447
## 5 JAKARTA UTARA 2578. 1695623
## 6 KAB.ADM.KEP.SERIBU 213. 8308
ggplot(data = plot2, aes(x = `NAMA KABUPATEN/KOTA`)) +
geom_line(aes(y = jumlah, group= 1),
linetype = 2, col = "blue") +
geom_point(aes(y = jumlah, size = large, col = large)) + scale_color_gradient(low = "green", high = "red") +
labs(x = "", y = NULL, title = "The Density of the Area in The District / City",
subtitle = "The Redder the Wider of Area",
col = NULL) + theme_minimal() +
theme(axis.text.x = element_text(size = 5.5))
Interpretation
1) East Jakarta is the most strategic place because the population is not too high but has a large area
2) West Jakarta is the most densely populated place because of the high population and the small area. This can indicate frequent congestion in the West Jakarta area
3) We want to know Total Population in Age Category
the plot below is very interactive
data %>%
group_by(`NAMA KABUPATEN/KOTA`, UMUR) %>%
summarise(Jumlah = sum(TOTAL)) -> plot3
head(plot3)
## # A tibble: 6 x 3
## # Groups: NAMA KABUPATEN/KOTA [1]
## `NAMA KABUPATEN/KOTA` UMUR Jumlah
## <chr> <chr> <dbl>
## 1 JAKARTA BARAT >75 118645
## 2 JAKARTA BARAT 35-39 288489
## 3 JAKARTA BARAT 40-44 317385
## 4 JAKARTA BARAT 45-49 245633
## 5 JAKARTA BARAT 50-54 355395
## 6 JAKARTA BARAT 55-59 316610
ggplot(data = plot3,
aes(x = factor(UMUR, levels = c("35-39","40-44","45-49","50-54","55-59","60-64","65-69","70-74",">75")))) + geom_line(aes(y = Jumlah, group = `NAMA KABUPATEN/KOTA`)) +
facet_wrap(~`NAMA KABUPATEN/KOTA`) +
labs(x = "", y = NULL,
title = "Total population in age category",
subtitle = "in units of hundreds of thousands of inhabitants") + theme_minimal() + theme(axis.text.x = element_text(angle = 90), plot.title = element_text(hjust = .5), plot.subtitle = element_text(hjust = .5)) -> plot3.y
plotly::ggplotly(plot3.y, tooltip = "y")
Interpretation
1) West Jakarta and North Jakarta have the same pattern
2) Ages 50-54 are very much in the area of North Jakarta and West Jakarta
3) This indicates that the majority of West Jakarta and North Jakarta are not immigrants but are native Jakarta people
4) East Jakarta, Central Jakarta, South Jakarta have the same pattern, the indication is that there are more youth in the area than the elderly
5) The congestion in these 3 regions can be ascertained to be higher than in the other 3 regions
4) We want to know the Density of Each Village
the plot below is very interactive
data %>%
group_by(`NAMA KELURAHAN`) %>%
summarise(wilayah = mean(`LUAS WILAYAH (KM2)`),
jiwa = sum(TOTAL),
Density = jiwa/wilayah) -> plot4
head(plot4)
## # A tibble: 6 x 4
## `NAMA KELURAHAN` wilayah jiwa Density
## <chr> <dbl> <dbl> <dbl>
## 1 ANCOL 5.77 29376 5091.
## 2 ANGKE 0.8 35492 44365
## 3 BALE KAMBANG 1.67 11847 7094.
## 4 BALI MESTER 0.67 5482 8182.
## 5 BAMBU APUS 3.17 10432 3291.
## 6 BANGKA 3.3 9919 3006.
ggplot(data = plot4,
aes(x = jiwa, y = wilayah)) +
geom_point(aes(size = Density, col = Density)) + scale_color_gradient(low = "green", high = "red") +
labs(x = "", y = "", title = "The Population Density of Each Village", col = NULL) +
theme_minimal() +
theme(plot.title = element_text(hjust = .5))
Interpretation
1) There are some areas that have high density marked with red areas
2) Overall population density in Jakarta was good in 2013