Library Setup
# Package
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
library(ggthemes)
Read Data
datapenduduk <- read.csv("E:/MK12/Statistik/data-penduduk.csv")
View Data
head(datapenduduk)
## jenis_kelamin jam_kerja jml_penduduk total_jml persentase
## 1 laki-laki 0* 16155 27853 1,07
## 2 perempuan 0* 11698 NA
## 3 laki-laki 1 - 14 72365 166039 6,37
## 4 perempuan 1 - 14 93674 NA
## 5 laki-laki 15 -34 341730 709701 27,22
## 6 perempuan 15 -34 367971 NA
Cleansing Data
glimpse(datapenduduk)
## Rows: 8
## Columns: 5
## $ jenis_kelamin <chr> "laki-laki", " perempuan ", "laki-laki", " perempuan ", …
## $ jam_kerja <chr> "0*", "0*", "1 - 14", "1 - 14", "15 -34", "15 -34", "35 …
## $ jml_penduduk <int> 16155, 11698, 72365, 93674, 341730, 367971, 993530, 7099…
## $ total_jml <int> 27853, NA, 166039, NA, 709701, NA, 1703477, NA
## $ persentase <chr> "1,07", "", "6,37", "", "27,22", "", "65,34", ""
Cek Missing Value
colSums(is.na(datapenduduk))
## jenis_kelamin jam_kerja jml_penduduk total_jml persentase
## 0 0 0 4 0
Visualisasi Data
ggplot(data = datapenduduk, mapping = aes(x = jml_penduduk, y = jenis_kelamin)) +
geom_point(alpha = 1)

ggplot(data = datapenduduk, mapping = aes(x = jml_penduduk, y = jenis_kelamin)) +
geom_point(alpha = 1, color = "blue")

ggplot(data = datapenduduk, mapping = aes(x = jml_penduduk, y = jenis_kelamin)) +
geom_point(alpha = 1, aes(color = jam_kerja))

ggplot(datapenduduk, aes(jml_penduduk)) +
geom_density()

ggplot(datapenduduk, aes(jml_penduduk)) +
geom_density(adjust = 1/5)

ggplot(datapenduduk, aes(jml_penduduk, colour = jenis_kelamin)) +
geom_density() +
xlim(10000, 900000)
## Warning: Removed 1 row containing non-finite outside the scale range
## (`stat_density()`).

ggplot(datapenduduk) +
geom_histogram(aes(x = jml_penduduk),fill="darkred",col="darkred",binwidth = 5000) +
ggtitle("Histogram Penduduk Yang Bekerja") +
ylab("Total") +
xlab("Jumlah Penduduk") +
theme(plot.title = element_text(hjust = 0.5))

ggplot(data = datapenduduk, mapping = aes(x = jenis_kelamin, y = jml_penduduk)) +
geom_boxplot()

ggplot(data = datapenduduk, mapping = aes(x = jenis_kelamin, y = jml_penduduk)) +
geom_boxplot(outlier.shape = NA) +
geom_jitter(alpha = 1, color = "tomato")
