Library Setup

# Package
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2)
library(ggthemes)

Read Data

datapenduduk <- read.csv("E:/MK12/Statistik/data-penduduk.csv")

View Data

head(datapenduduk)
##   jenis_kelamin jam_kerja jml_penduduk total_jml persentase
## 1     laki-laki        0*        16155     27853       1,07
## 2    perempuan         0*        11698        NA           
## 3     laki-laki    1 - 14        72365    166039       6,37
## 4    perempuan     1 - 14        93674        NA           
## 5     laki-laki    15 -34       341730    709701      27,22
## 6    perempuan     15 -34       367971        NA

Cleansing Data

glimpse(datapenduduk)
## Rows: 8
## Columns: 5
## $ jenis_kelamin <chr> "laki-laki", " perempuan ", "laki-laki", " perempuan ", …
## $ jam_kerja     <chr> "0*", "0*", "1 - 14", "1 - 14", "15 -34", "15 -34", "35 …
## $ jml_penduduk  <int> 16155, 11698, 72365, 93674, 341730, 367971, 993530, 7099…
## $ total_jml     <int> 27853, NA, 166039, NA, 709701, NA, 1703477, NA
## $ persentase    <chr> "1,07", "", "6,37", "", "27,22", "", "65,34", ""

Cek Missing Value

colSums(is.na(datapenduduk))
## jenis_kelamin     jam_kerja  jml_penduduk     total_jml    persentase 
##             0             0             0             4             0

Visualisasi Data

ggplot(data = datapenduduk, mapping = aes(x = jml_penduduk, y = jenis_kelamin)) +
  geom_point(alpha = 1)

ggplot(data = datapenduduk, mapping = aes(x = jml_penduduk, y = jenis_kelamin)) +
  geom_point(alpha = 1, color = "blue")

ggplot(data = datapenduduk, mapping = aes(x = jml_penduduk, y = jenis_kelamin)) +
  geom_point(alpha = 1, aes(color = jam_kerja))

ggplot(datapenduduk, aes(jml_penduduk)) +
  geom_density()

ggplot(datapenduduk, aes(jml_penduduk)) +
  geom_density(adjust = 1/5)

ggplot(datapenduduk, aes(jml_penduduk, colour = jenis_kelamin)) +
  geom_density() +
  xlim(10000, 900000)
## Warning: Removed 1 row containing non-finite outside the scale range
## (`stat_density()`).

ggplot(datapenduduk) +
  geom_histogram(aes(x = jml_penduduk),fill="darkred",col="darkred",binwidth = 5000) +
  ggtitle("Histogram Penduduk Yang Bekerja") +
  ylab("Total") +
  xlab("Jumlah Penduduk") + 
  theme(plot.title = element_text(hjust = 0.5))

ggplot(data = datapenduduk, mapping = aes(x = jenis_kelamin, y = jml_penduduk)) +
    geom_boxplot()

ggplot(data = datapenduduk, mapping = aes(x = jenis_kelamin, y = jml_penduduk)) +
    geom_boxplot(outlier.shape = NA) +
    geom_jitter(alpha = 1, color = "tomato")