library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.0.5
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v ggplot2 3.3.5     v purrr   0.3.4
## v tibble  3.1.3     v dplyr   1.0.7
## v tidyr   1.1.3     v stringr 1.4.0
## v readr   2.0.1     v forcats 0.5.1
## Warning: package 'ggplot2' was built under R version 4.0.5
## Warning: package 'tibble' was built under R version 4.0.5
## Warning: package 'tidyr' was built under R version 4.0.5
## Warning: package 'readr' was built under R version 4.0.5
## Warning: package 'dplyr' was built under R version 4.0.5
## Warning: package 'stringr' was built under R version 4.0.5
## Warning: package 'forcats' was built under R version 4.0.5
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()

Melihat isi package

??tidyverse
## starting httpd help server ... done

Dataset yang akan digunakan adalah ‘HairEyeColor’ yaitu data warna rambut dan mata dari 592 siswa statistika.

Observasi ini mempunyai 3 variabel beserta levelnya: Hair (Black,Brown,Red,Blond) Eye(Brown,Blue,Hazel,Green) Sex(Male,Female)

library(datasets)
data("HairEyeColor")
HairEyeColor<-tibble::as_tibble(HairEyeColor)

Memunculkan 6 baris teratas

head(HairEyeColor)
## # A tibble: 6 x 4
##   Hair  Eye   Sex       n
##   <chr> <chr> <chr> <dbl>
## 1 Black Brown Male     32
## 2 Brown Brown Male     53
## 3 Red   Brown Male     10
## 4 Blond Brown Male      3
## 5 Black Blue  Male     11
## 6 Brown Blue  Male     50

Menghitung rata-rata jumlah siswa menurut gender

HairEyeColor %>% group_by(Sex) %>% summarise(mean=mean(n), .groups='drop')
## # A tibble: 2 x 2
##   Sex     mean
##   <chr>  <dbl>
## 1 Female  19.6
## 2 Male    17.4

Menghitung rata-rata jumlah siswa menurut warna mata

HairEyeColor %>% group_by(Eye) %>% summarise(mean=mean(n), .groups='drop')
## # A tibble: 4 x 2
##   Eye    mean
##   <chr> <dbl>
## 1 Blue   26.9
## 2 Brown  27.5
## 3 Green   8  
## 4 Hazel  11.6

Menghitung rata-rata jumlah siswa menurut warna rambut

HairEyeColor %>% group_by(Hair) %>% summarise(mean=mean(n), .groups='drop')
## # A tibble: 4 x 2
##   Hair   mean
##   <chr> <dbl>
## 1 Black 13.5 
## 2 Blond 15.9 
## 3 Brown 35.8 
## 4 Red    8.88

Mengurutkan berdasarkan banyaknya siswa dari nilai terkecil

HairEyeColor %>% arrange(n)
## # A tibble: 32 x 4
##    Hair  Eye   Sex        n
##    <chr> <chr> <chr>  <dbl>
##  1 Black Green Female     2
##  2 Blond Brown Male       3
##  3 Black Green Male       3
##  4 Blond Brown Female     4
##  5 Blond Hazel Male       5
##  6 Black Hazel Female     5
##  7 Blond Hazel Female     5
##  8 Red   Hazel Male       7
##  9 Red   Green Male       7
## 10 Red   Blue  Female     7
## # ... with 22 more rows

Mengurutkan berdasarkan banyaknya siswa dari nilai terbesar

HairEyeColor %>% arrange(desc(n))
## # A tibble: 32 x 4
##    Hair  Eye   Sex        n
##    <chr> <chr> <chr>  <dbl>
##  1 Brown Brown Female    66
##  2 Blond Blue  Female    64
##  3 Brown Brown Male      53
##  4 Brown Blue  Male      50
##  5 Black Brown Female    36
##  6 Brown Blue  Female    34
##  7 Black Brown Male      32
##  8 Blond Blue  Male      30
##  9 Brown Hazel Female    29
## 10 Brown Hazel Male      25
## # ... with 22 more rows

Melakukan filter pada data HairEyeColor sehingga diperoleh hanya data untuk Perempuan

HairEyeColor %>% filter(Sex=="Female")
## # A tibble: 16 x 4
##    Hair  Eye   Sex        n
##    <chr> <chr> <chr>  <dbl>
##  1 Black Brown Female    36
##  2 Brown Brown Female    66
##  3 Red   Brown Female    16
##  4 Blond Brown Female     4
##  5 Black Blue  Female     9
##  6 Brown Blue  Female    34
##  7 Red   Blue  Female     7
##  8 Blond Blue  Female    64
##  9 Black Hazel Female     5
## 10 Brown Hazel Female    29
## 11 Red   Hazel Female     7
## 12 Blond Hazel Female     5
## 13 Black Green Female     2
## 14 Brown Green Female    14
## 15 Red   Green Female     7
## 16 Blond Green Female     8

Melakukan filter pada data HairEyeColor sehingga diperoleh hanya data untuk Perempuan dengan mata biru

HairEyeColor %>% filter(Sex=="Female" & Eye=="Blue")
## # A tibble: 4 x 4
##   Hair  Eye   Sex        n
##   <chr> <chr> <chr>  <dbl>
## 1 Black Blue  Female     9
## 2 Brown Blue  Female    34
## 3 Red   Blue  Female     7
## 4 Blond Blue  Female    64

Melakukan filter pada data HairEyeColor sehingga diperoleh hanya data untuk siswa rambut hitam dengan mata biru

HairEyeColor %>% filter(Hair=="Black" & Eye=="Blue")
## # A tibble: 2 x 4
##   Hair  Eye   Sex        n
##   <chr> <chr> <chr>  <dbl>
## 1 Black Blue  Male      11
## 2 Black Blue  Female     9

Memilih subset data

HairEyeColor %>% select(Sex,Eye,n)
## # A tibble: 32 x 3
##    Sex   Eye       n
##    <chr> <chr> <dbl>
##  1 Male  Brown    32
##  2 Male  Brown    53
##  3 Male  Brown    10
##  4 Male  Brown     3
##  5 Male  Blue     11
##  6 Male  Blue     50
##  7 Male  Blue     10
##  8 Male  Blue     30
##  9 Male  Hazel    10
## 10 Male  Hazel    25
## # ... with 22 more rows

Mencari proporsi tiap kriteria

a<-HairEyeColor %>% mutate(p=n/sum(n))
sum(a$p)
## [1] 1

Data Exploration

Mengetahui ukuran data

dim(HairEyeColor)
## [1] 32  4

Pratinjau data

glimpse(HairEyeColor)
## Rows: 32
## Columns: 4
## $ Hair <chr> "Black", "Brown", "Red", "Blond", "Black", "Brown", "Red", "Blond~
## $ Eye  <chr> "Brown", "Brown", "Brown", "Brown", "Blue", "Blue", "Blue", "Blue~
## $ Sex  <chr> "Male", "Male", "Male", "Male", "Male", "Male", "Male", "Male", "~
## $ n    <dbl> 32, 53, 10, 3, 11, 50, 10, 30, 10, 25, 7, 5, 3, 15, 7, 8, 36, 66,~

Filtering data dengan hanya memilih data siswa perempuan

color<-HairEyeColor %>% filter(Sex=="Female")
nrow(color)
## [1] 16
head(color)
## # A tibble: 6 x 4
##   Hair  Eye   Sex        n
##   <chr> <chr> <chr>  <dbl>
## 1 Black Brown Female    36
## 2 Brown Brown Female    66
## 3 Red   Brown Female    16
## 4 Blond Brown Female     4
## 5 Black Blue  Female     9
## 6 Brown Blue  Female    34

Filtering data dengan hanya memilih data laki-laki yang jumlah orang tiap karakteristik antara 0 dan 10

color<-HairEyeColor %>%
        filter(Sex=="Male" & n %in% 0:10)%>%
        select(Hair, Eye, n)
color
## # A tibble: 9 x 3
##   Hair  Eye       n
##   <chr> <chr> <dbl>
## 1 Red   Brown    10
## 2 Blond Brown     3
## 3 Red   Blue     10
## 4 Black Hazel    10
## 5 Red   Hazel     7
## 6 Blond Hazel     5
## 7 Black Green     3
## 8 Red   Green     7
## 9 Blond Green     8

Mengganti nama tabel

color %>% rename( banyak = n) 
## # A tibble: 9 x 3
##   Hair  Eye   banyak
##   <chr> <chr>  <dbl>
## 1 Red   Brown     10
## 2 Blond Brown      3
## 3 Red   Blue      10
## 4 Black Hazel     10
## 5 Red   Hazel      7
## 6 Blond Hazel      5
## 7 Black Green      3
## 8 Red   Green      7
## 9 Blond Green      8