Import Dataset

Load packages

Sebelum kita mengimport data dan lainnya lebih dulu mengaktifkan packages

library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v ggplot2 3.3.5     v purrr   0.3.4
## v tibble  3.1.4     v stringr 1.4.0
## v tidyr   1.1.3     v forcats 0.5.1
## v readr   2.0.1
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()

Data yang digunakan ini berasal dari dataset HairEyeColor di R. Ini berisi informasi untuk warna rambut, warna mata, dan tabulasi silang jenis kelamin untuk 592 mahasiswa statistik di University of Delaware yang dilaporkan oleh Snee (1974).

#Load Datasets
ds <- data.frame(HairEyeColor)

head digunakan untuk melihat enam baris pertama pada tabel

head(ds)
##    Hair   Eye  Sex Freq
## 1 Black Brown Male   32
## 2 Brown Brown Male   53
## 3   Red Brown Male   10
## 4 Blond Brown Male    3
## 5 Black  Blue Male   11
## 6 Brown  Blue Male   50

tails digunakan untuk melihat enam baris terakhir pada tabel

head(ds)
##    Hair   Eye  Sex Freq
## 1 Black Brown Male   32
## 2 Brown Brown Male   53
## 3   Red Brown Male   10
## 4 Blond Brown Male    3
## 5 Black  Blue Male   11
## 6 Brown  Blue Male   50

Selecting columns using select()

Select a set of columns: Memilih kolom Hair dan Eye

head(select(ds, Hair, Eye))
##    Hair   Eye
## 1 Black Brown
## 2 Brown Brown
## 3   Red Brown
## 4 Blond Brown
## 5 Black  Blue
## 6 Brown  Blue

Untuk memilih(select) semua kolom kecuali salah satu kolom, gunakan operator “-” (subtraction) (also known as negative indexing)

head(select(ds, -Freq))
##    Hair   Eye  Sex
## 1 Black Brown Male
## 2 Brown Brown Male
## 3   Red Brown Male
## 4 Blond Brown Male
## 5 Black  Blue Male
## 6 Brown  Blue Male

Untuk memilih range kolom berdasarkan nama variabel, dengan menggunakan operator “:” (colon)

head(select(ds, Hair:Sex))
##    Hair   Eye  Sex
## 1 Black Brown Male
## 2 Brown Brown Male
## 3   Red Brown Male
## 4 Blond Brown Male
## 5 Black  Blue Male
## 6 Brown  Blue Male

Selecting rows using filter()

Filter baris untuk memilih banyaknya mahasiswa (Freq) >= 10 orang

filter(ds, Freq >= 10)
##     Hair   Eye    Sex Freq
## 1  Black Brown   Male   32
## 2  Brown Brown   Male   53
## 3    Red Brown   Male   10
## 4  Black  Blue   Male   11
## 5  Brown  Blue   Male   50
## 6    Red  Blue   Male   10
## 7  Blond  Blue   Male   30
## 8  Black Hazel   Male   10
## 9  Brown Hazel   Male   25
## 10 Brown Green   Male   15
## 11 Black Brown Female   36
## 12 Brown Brown Female   66
## 13   Red Brown Female   16
## 14 Brown  Blue Female   34
## 15 Blond  Blue Female   64
## 16 Brown Hazel Female   29
## 17 Brown Green Female   14

Filter baris untuk menampilkan mata (Eye) yang berwana Brown dan Green

filter(ds, Eye %in% c("Brown", "Green"))
##     Hair   Eye    Sex Freq
## 1  Black Brown   Male   32
## 2  Brown Brown   Male   53
## 3    Red Brown   Male   10
## 4  Blond Brown   Male    3
## 5  Black Green   Male    3
## 6  Brown Green   Male   15
## 7    Red Green   Male    7
## 8  Blond Green   Male    8
## 9  Black Brown Female   36
## 10 Brown Brown Female   66
## 11   Red Brown Female   16
## 12 Blond Brown Female    4
## 13 Black Green Female    2
## 14 Brown Green Female   14
## 15   Red Green Female    7
## 16 Blond Green Female    8

Arrange rows using arrange()

Mengurutkan berdasarkan Freq dari nilai terkecil

ds %>% arrange(Freq) %>% head
##    Hair   Eye    Sex Freq
## 1 Black Green Female    2
## 2 Blond Brown   Male    3
## 3 Black Green   Male    3
## 4 Blond Brown Female    4
## 5 Blond Hazel   Male    5
## 6 Black Hazel Female    5

Create new columns using mutate()

Membuat kolom baru yaitu Prop yaitu nilai proporsi dari variabel Freq

ds %>% mutate(Prop_Freq=Freq/592) %>% head
##    Hair   Eye  Sex Freq   Prop_Freq
## 1 Black Brown Male   32 0.054054054
## 2 Brown Brown Male   53 0.089527027
## 3   Red Brown Male   10 0.016891892
## 4 Blond Brown Male    3 0.005067568
## 5 Black  Blue Male   11 0.018581081
## 6 Brown  Blue Male   50 0.084459459

Menggabungkan FUngsi

Menggabungkan beberapa fungsi sebelumnya dalam satu tabel

ds %>% 
  select(Hair, Eye, Freq) %>% 
  arrange(Hair, desc(Freq)) %>% 
  filter(Freq >= 15) %>% 
  mutate(Prop = Freq/592) 
##     Hair   Eye Freq       Prop
## 1  Black Brown   36 0.06081081
## 2  Black Brown   32 0.05405405
## 3  Brown Brown   66 0.11148649
## 4  Brown Brown   53 0.08952703
## 5  Brown  Blue   50 0.08445946
## 6  Brown  Blue   34 0.05743243
## 7  Brown Hazel   29 0.04898649
## 8  Brown Hazel   25 0.04222973
## 9  Brown Green   15 0.02533784
## 10   Red Brown   16 0.02702703
## 11 Blond  Blue   64 0.10810811
## 12 Blond  Blue   30 0.05067568

Menggabungkan beberapa fungsi summarise dalam satu tabel

ds %>% 
  group_by(Hair) %>%
  summarise(avgf= mean(Freq), 
            minf = min(Freq), 
            maxf= max(Freq),
            total = n()) 
## # A tibble: 4 x 5
##   Hair   avgf  minf  maxf total
##   <fct> <dbl> <dbl> <dbl> <int>
## 1 Black 13.5      2    36     8
## 2 Brown 35.8     14    66     8
## 3 Red    8.88     7    16     8
## 4 Blond 15.9      3    64     8