Email             :
RPubs            : https://rpubs.com/rizalandriana
Github           : https://github.com/rizalandriana
Jurusan          : Teknik Informatika
Address         : ARA Center, Matana University Tower
                         Jl. CBD Barat Kav, RT.1, Curug Sangereng, Kelapa Dua, Tangerang, Banten 15810.


1 Persiapan Data

1.1 Import Data

library(readr)
df = read.csv("students.csv")

1.2 Cek data hilang

sapply(df, function(x) sum(is.na(x)))
##       ï..ID      Gender       Grade   Horoscope     Subject      IntExt 
##           0           0           0           0           0           0 
##     OptPest  ScreenTime       Sleep  PhysActive HrsHomework  SpendTime1 
##           0           0           0           1           0           0 
##  SpendTime2       Self1       Self2      Career  Superpower 
##           0           0           0           0           0

1.3 Hapus data hilang

df <- na.omit(df)
head(df,3)
##   ï..ID Gender Grade Horoscope Subject    IntExt  OptPest ScreenTime Sleep
## 1     1   male     4   Scorpio    Math Extravert Optimist          1     7
## 2     2 female     4 Capricorn     Gym Extravert Optimist          1     8
## 3     3   male     4    Taurus    Math Introvert Optimist          4     9
##   PhysActive HrsHomework      SpendTime1 SpendTime2  Self1       Self2
## 1         10          10        baseball   relaxing active competitive
## 2          5           0 playing outside   swimming   kind      active
## 3         22           1     video games     soccer active    creative
##                         Career            Superpower
## 1 professional baseball player           sonic speed
## 2                      Teacher power to grant wishes
## 3   professional soccer player         powerful kick

1.4 Eksplorasi data kualitatif

1.4.1 Kategorikal Univariat

  • Frekuensi masing-masing kategori
table(df$Gender) # Frekuensi dari masing-masing kategori
## 
## Don't Identify         female           male 
##              2             91             91
  • Proporsi kategori
prop.table(table(df$Gender)) # Proporsi kategori
## 
## Don't Identify         female           male 
##     0.01086957     0.49456522     0.49456522

1.4.2 Kategorikal Bivariat

library(readr)
library(dplyr)
library(magrittr)
Cat2 <- df %>%
  select(Gender, Horoscope) %>%
  table()

Cat2
##                 Horoscope
## Gender           Aquarius Aries Cancer Capricorn Gemini Leo Libra Pisces
##   Don't Identify        0     1      0         0      0   0     1      0
##   female                2     9     12         6      7  11     5      9
##   male                  8    11      8         9      9   5     6      6
##                 Horoscope
## Gender           Sagittarius Scorpio Taurus Virgo
##   Don't Identify           0       0      0     0
##   female                   8       8      7     7
##   male                     4      10      8     7

1.4.3 Kategorical Multivariate

Cat3 <- df %>%
  select(Gender, Horoscope, Subject) %>%
  # table()       # tabel yang mudah dibaca oleh mesin
  # prop.table()  # proporsi tabel
  ftable()        # tabel yang dapat dibaca oleh manusia

Cat3
##                            Subject   Art Gym History Math Science
## Gender         Horoscope                                         
## Don't Identify Aquarius            0   0   0       0    0       0
##                Aries               0   0   0       1    0       0
##                Cancer              0   0   0       0    0       0
##                Capricorn           0   0   0       0    0       0
##                Gemini              0   0   0       0    0       0
##                Leo                 0   0   0       0    0       0
##                Libra               0   1   0       0    0       0
##                Pisces              0   0   0       0    0       0
##                Sagittarius         0   0   0       0    0       0
##                Scorpio             0   0   0       0    0       0
##                Taurus              0   0   0       0    0       0
##                Virgo               0   0   0       0    0       0
## female         Aquarius            0   0   2       0    0       0
##                Aries               0   3   4       1    1       0
##                Cancer              0   4   5       0    1       2
##                Capricorn           0   2   3       0    1       0
##                Gemini              0   2   2       0    1       2
##                Leo                 0   2   3       1    3       2
##                Libra               0   1   0       0    3       1
##                Pisces              0   3   3       0    2       1
##                Sagittarius         0   1   2       0    2       3
##                Scorpio             0   3   1       0    2       2
##                Taurus              0   4   2       0    1       0
##                Virgo               0   0   1       1    2       3
## male           Aquarius            0   3   3       0    1       1
##                Aries               0   1   3       0    4       3
##                Cancer              0   0   6       1    1       0
##                Capricorn           1   1   6       0    1       0
##                Gemini              0   0   4       1    2       2
##                Leo                 0   3   1       0    0       1
##                Libra               0   1   4       0    1       0
##                Pisces              0   0   2       0    2       2
##                Sagittarius         0   0   1       0    3       0
##                Scorpio             0   1   6       0    1       2
##                Taurus              0   0   2       1    5       0
##                Virgo               0   1   3       1    2       0

1.5 Eksplorasi data kuantitatif

1.5.1 Univariat kontinu

  • Memilih kolom yang numeric
Kuan <- df %>%
  select_if(is.numeric)

names(Kuan)
## [1] "ï..ID"       "Grade"       "ScreenTime"  "Sleep"       "PhysActive" 
## [6] "HrsHomework"

1.5.1.1 Tendensi sentral

  • Rata rata
mean(Kuan$HrsHomework)
## [1] 4.1875
  • Kuantil
quantile(Kuan$HrsHomework)
##   0%  25%  50%  75% 100% 
##    0    1    3    6   35
  • Median
median(Kuan$HrsHomework)
## [1] 3
  • Mode
mode(Kuan$HrsHomework)
## [1] "numeric"

1.5.1.2 Penskalaan

  • Variansi
var(Kuan$HrsHomework)
## [1] 23.05891
  • Standar deviasi
sd(Kuan$HrsHomework)
## [1] 4.80197
  • Deviasi median absolut
mad(Kuan$HrsHomework)
## [1] 2.9652
  • Jarak antar kuantil
IQR(Kuan$HrsHomework)
## [1] 5

1.5.1.3 Kecondongan

library(e1071)
skewness(Kuan$HrsHomework)
## [1] 2.606671

1.5.1.4 Kurtosis

kurtosis(Kuan$HrsHomework)
## [1] 10.2512

1.5.2 Bivariat kontinu

  • Kovarian
cov(Kuan$HrsHomework, Kuan$Sleep)
## [1] -0.3942964
  • Koefisien korelasi
cor(Kuan$HrsHomework, Kuan$Sleep)
## [1] -0.0534581
  • Z-score
zscore <- (Kuan$HrsHomework - mean(Kuan$HrsHomework)) / sd(Kuan$HrsHomework)

1.5.3 Multivariat kontinu

  • Matriks kovarian sampel
cov(Kuan)
##                   ï..ID      Grade ScreenTime      Sleep  PhysActive
## ï..ID       2867.736250  2.1336719   5.842317 -5.9110982  17.9871703
## Grade          2.133672  1.9398610   1.331299 -0.6939742  -3.1610834
## ScreenTime     5.842317  1.3312990   5.383872 -1.7120976  -4.8129009
## Sleep         -5.911098 -0.6939742  -1.712098  2.3592822   0.2690663
## PhysActive    17.987170 -3.1610834  -4.812901  0.2690663 139.4967926
## HrsHomework  -36.511270  0.7885929   2.431182 -0.3942964   6.5054645
##             HrsHomework
## ï..ID       -36.5112705
## Grade         0.7885929
## ScreenTime    2.4311817
## Sleep        -0.3942964
## PhysActive    6.5054645
## HrsHomework  23.0589139
  • Matriks korelasi sampel
cor(Kuan)
##                   ï..ID       Grade  ScreenTime       Sleep  PhysActive
## ï..ID        1.00000000  0.02860703  0.04701843 -0.07186354  0.02843879
## Grade        0.02860703  1.00000000  0.41194850 -0.32439047 -0.19216245
## ScreenTime   0.04701843  0.41194850  1.00000000 -0.48038694 -0.17562140
## Sleep       -0.07186354 -0.32439047 -0.48038694  1.00000000  0.01483157
## PhysActive   0.02843879 -0.19216245 -0.17562140  0.01483157  1.00000000
## HrsHomework -0.14198351  0.11790932  0.21819792 -0.05345810  0.11470353
##             HrsHomework
## ï..ID        -0.1419835
## Grade         0.1179093
## ScreenTime    0.2181979
## Sleep        -0.0534581
## PhysActive    0.1147035
## HrsHomework   1.0000000

2 EDA secara ‘malas’

library(funModeling) 
library(tidyverse) 
library(Hmisc)
library(skimr)
basic_eda <- function(data)
{
  glimpse(data)
  skim(data)
  df_status(data)
  freq(data) 
  profiling_num(data)
  plot_num(data)
  describe(data)
}
basic_eda(df)

3 Membuat laporan EDA

library(DataExplorer)

# DataExplorer::create_report(df)
LS0tDQp0aXRsZTogIlR1Z2FzIDA1Ig0Kc3VidGl0bGU6ICJBbmFsaXNpcyBFa3NwbG9yYXNpIERhdGEiDQphdXRob3I6ICJSaXphbCBBbmRyaWFuYSAoMjAyMDU1MjAwNykiDQpkYXRlOiAiYHIgZm9ybWF0KFN5cy5EYXRlKCksICclQiAlZCwgJVknKWAiDQpvdXRwdXQ6IA0KICBodG1sX2RvY3VtZW50OiANCiAgICBodG1sX2RvY3VtZW50OiBudWxsDQogICAgY29kZV9mb2xkaW5nOiBoaWRlDQogICAgdG9jOiB5ZXMNCiAgICB0b2NfZmxvYXQ6DQogICAgICBjb2xsYXBzZWQ6IHllcw0KICAgIG51bWJlcl9zZWN0aW9uczogeWVzDQogICAgY29kZV9kb3dubG9hZDogeWVzDQogICAgdGhlbWU6IHNhbmRzdG9uZQ0KICAgIGNzczogc3R5bGUxLmNzcw0KICAgIGhpZ2hsaWdodDogbW9ub2Nocm9tZQ0KLS0tDQoNCg0KPGltZyBzdHlsZT0iZmxvYXQ6IHJpZ2h0OyBtYXJnaW46IDBweCAxMDBweCAwcHggMHB4OyB3aWR0aDoyNSUiIHNyYz0ibWUuanBlZyIvPiANCg0KYGBge3IgbG9nbywgZWNobz1GQUxTRSxmaWcuYWxpZ249J2NlbnRlcicsIG91dC53aWR0aCA9ICczMCUnfQ0Ka25pdHI6OmluY2x1ZGVfZ3JhcGhpY3MoImxvZ28ucG5nIikNCmBgYA0KDQpFbWFpbCAmbmJzcDsmbmJzcDsmbmJzcDsmbmJzcDsmbmJzcDsgJm5ic3A7ICZuYnNwOyAmbmJzcDsmbmJzcDs6ICByaXphbC5hbmRyaWFuYUBzdHVkZW50Lm1hdGFuYXVuaXZlcnNpdHkuYWMuaWQgPGJyPg0KUlB1YnMgICZuYnNwOyZuYnNwOyZuYnNwOyZuYnNwOyZuYnNwOyAmbmJzcDsgJm5ic3A7ICZuYnNwOzogaHR0cHM6Ly9ycHVicy5jb20vcml6YWxhbmRyaWFuYSA8YnI+DQpHaXRodWIgICZuYnNwOyZuYnNwOyZuYnNwOyZuYnNwOyZuYnNwOyAmbmJzcDsgJm5ic3A7IDogaHR0cHM6Ly9naXRodWIuY29tL3JpemFsYW5kcmlhbmEgPGJyPg0KSnVydXNhbiAmbmJzcDsgJm5ic3A7ICZuYnNwOyAmbmJzcDsgJm5ic3A7OiBbVGVrbmlrIEluZm9ybWF0aWthXShodHRwczovL21hdGFuYXVuaXZlcnNpdHkuYWMuaWQvP2x5PWFjYWRlbWljJmM9dGkpIDxicj4NCkFkZHJlc3MgICZuYnNwOyAmbmJzcDsgJm5ic3A7ICZuYnNwOyA6IEFSQSBDZW50ZXIsIE1hdGFuYSBVbml2ZXJzaXR5IFRvd2VyIDxicj4NCiZuYnNwOyAmbmJzcDsgJm5ic3A7ICZuYnNwOyAmbmJzcDsgJm5ic3A7ICZuYnNwOyAmbmJzcDsgJm5ic3A7ICZuYnNwOyAmbmJzcDsgJm5ic3A7Jm5ic3A7IEpsLiBDQkQgQmFyYXQgS2F2LCBSVC4xLCBDdXJ1ZyBTYW5nZXJlbmcsIEtlbGFwYSBEdWEsIFRhbmdlcmFuZywgQmFudGVuIDE1ODEwLg0KDQoqKioqDQoNCiMgUGVyc2lhcGFuIERhdGENCg0KIyMgSW1wb3J0IERhdGENCg0KYGBge3J9DQpsaWJyYXJ5KHJlYWRyKQ0KZGYgPSByZWFkLmNzdigic3R1ZGVudHMuY3N2IikNCmBgYA0KDQojIyBDZWsgZGF0YSBoaWxhbmcNCg0KYGBge3J9DQpzYXBwbHkoZGYsIGZ1bmN0aW9uKHgpIHN1bShpcy5uYSh4KSkpDQpgYGANCg0KIyMgSGFwdXMgZGF0YSBoaWxhbmcNCg0KYGBge3J9DQpkZiA8LSBuYS5vbWl0KGRmKQ0KaGVhZChkZiwzKQ0KYGBgDQoNCiMjIEVrc3Bsb3Jhc2kgZGF0YSBrdWFsaXRhdGlmDQoNCg0KIyMjIEthdGVnb3Jpa2FsIFVuaXZhcmlhdA0KDQoqIEZyZWt1ZW5zaSBtYXNpbmctbWFzaW5nIGthdGVnb3JpDQoNCmBgYHtyfQ0KdGFibGUoZGYkR2VuZGVyKSAjIEZyZWt1ZW5zaSBkYXJpIG1hc2luZy1tYXNpbmcga2F0ZWdvcmkNCmBgYA0KDQoqIFByb3BvcnNpIGthdGVnb3JpDQoNCmBgYHtyfQ0KcHJvcC50YWJsZSh0YWJsZShkZiRHZW5kZXIpKSAjIFByb3BvcnNpIGthdGVnb3JpDQpgYGANCg0KIyMjIEthdGVnb3Jpa2FsIEJpdmFyaWF0DQoNCmBgYHtyLCBtZXNzYWdlPUZBTFNFfQ0KbGlicmFyeShyZWFkcikNCmxpYnJhcnkoZHBseXIpDQpsaWJyYXJ5KG1hZ3JpdHRyKQ0KQ2F0MiA8LSBkZiAlPiUNCiAgc2VsZWN0KEdlbmRlciwgSG9yb3Njb3BlKSAlPiUNCiAgdGFibGUoKQ0KDQpDYXQyDQpgYGANCg0KIyMjIEthdGVnb3JpY2FsIE11bHRpdmFyaWF0ZQ0KDQpgYGB7cn0NCkNhdDMgPC0gZGYgJT4lDQogIHNlbGVjdChHZW5kZXIsIEhvcm9zY29wZSwgU3ViamVjdCkgJT4lDQogICMgdGFibGUoKSAgICAgICAjIHRhYmVsIHlhbmcgbXVkYWggZGliYWNhIG9sZWggbWVzaW4NCiAgIyBwcm9wLnRhYmxlKCkgICMgcHJvcG9yc2kgdGFiZWwNCiAgZnRhYmxlKCkgICAgICAgICMgdGFiZWwgeWFuZyBkYXBhdCBkaWJhY2Egb2xlaCBtYW51c2lhDQoNCkNhdDMNCmBgYA0KDQojIyBFa3NwbG9yYXNpIGRhdGEga3VhbnRpdGF0aWYNCg0KIyMjIFVuaXZhcmlhdCBrb250aW51DQoNCiogTWVtaWxpaCBrb2xvbSB5YW5nIG51bWVyaWMNCg0KYGBge3J9DQpLdWFuIDwtIGRmICU+JQ0KICBzZWxlY3RfaWYoaXMubnVtZXJpYykNCg0KbmFtZXMoS3VhbikNCmBgYA0KDQojIyMjIFRlbmRlbnNpIHNlbnRyYWwNCiogUmF0YSByYXRhDQoNCmBgYHtyfQ0KbWVhbihLdWFuJEhyc0hvbWV3b3JrKQ0KYGBgDQoNCiogS3VhbnRpbA0KDQpgYGB7cn0NCnF1YW50aWxlKEt1YW4kSHJzSG9tZXdvcmspDQpgYGANCg0KKiBNZWRpYW4NCmBgYHtyfQ0KbWVkaWFuKEt1YW4kSHJzSG9tZXdvcmspDQpgYGANCg0KKiBNb2RlDQpgYGB7cn0NCm1vZGUoS3VhbiRIcnNIb21ld29yaykNCmBgYA0KDQojIyMjIFBlbnNrYWxhYW4NCg0KKiBWYXJpYW5zaQ0KDQpgYGB7cn0NCnZhcihLdWFuJEhyc0hvbWV3b3JrKQ0KYGBgDQoNCiogU3RhbmRhciBkZXZpYXNpDQoNCmBgYHtyfQ0Kc2QoS3VhbiRIcnNIb21ld29yaykNCmBgYA0KDQoqIERldmlhc2kgbWVkaWFuIGFic29sdXQNCg0KYGBge3J9DQptYWQoS3VhbiRIcnNIb21ld29yaykNCmBgYA0KDQoqIEphcmFrIGFudGFyIGt1YW50aWwNCg0KYGBge3J9DQpJUVIoS3VhbiRIcnNIb21ld29yaykNCmBgYA0KDQojIyMjIEtlY29uZG9uZ2FuDQpgYGB7ciwgbWVzc2FnZT1GQUxTRX0NCmxpYnJhcnkoZTEwNzEpDQpza2V3bmVzcyhLdWFuJEhyc0hvbWV3b3JrKQ0KYGBgDQoNCiMjIyMgS3VydG9zaXMNCmBgYHtyfQ0Ka3VydG9zaXMoS3VhbiRIcnNIb21ld29yaykNCmBgYA0KDQojIyMgQml2YXJpYXQga29udGludQ0KDQoqIEtvdmFyaWFuDQoNCmBgYHtyfQ0KY292KEt1YW4kSHJzSG9tZXdvcmssIEt1YW4kU2xlZXApDQpgYGANCg0KKiBLb2VmaXNpZW4ga29yZWxhc2kNCg0KYGBge3J9DQpjb3IoS3VhbiRIcnNIb21ld29yaywgS3VhbiRTbGVlcCkNCmBgYA0KDQoqIFotc2NvcmUNCg0KYGBge3J9DQp6c2NvcmUgPC0gKEt1YW4kSHJzSG9tZXdvcmsgLSBtZWFuKEt1YW4kSHJzSG9tZXdvcmspKSAvIHNkKEt1YW4kSHJzSG9tZXdvcmspDQpgYGANCg0KIyMjIE11bHRpdmFyaWF0IGtvbnRpbnUNCg0KKiBNYXRyaWtzIGtvdmFyaWFuIHNhbXBlbA0KDQpgYGB7cn0NCmNvdihLdWFuKQ0KYGBgDQoNCiogTWF0cmlrcyBrb3JlbGFzaSBzYW1wZWwNCg0KYGBge3J9DQpjb3IoS3VhbikNCmBgYA0KDQojIEVEQSBzZWNhcmEgJ21hbGFzJw0KDQpgYGB7ciwgbWVzc2FnZT1GQUxTRSwgZXZhbD1GQUxTRX0NCmxpYnJhcnkoZnVuTW9kZWxpbmcpIA0KbGlicmFyeSh0aWR5dmVyc2UpIA0KbGlicmFyeShIbWlzYykNCmxpYnJhcnkoc2tpbXIpDQpiYXNpY19lZGEgPC0gZnVuY3Rpb24oZGF0YSkNCnsNCiAgZ2xpbXBzZShkYXRhKQ0KICBza2ltKGRhdGEpDQogIGRmX3N0YXR1cyhkYXRhKQ0KICBmcmVxKGRhdGEpIA0KICBwcm9maWxpbmdfbnVtKGRhdGEpDQogIHBsb3RfbnVtKGRhdGEpDQogIGRlc2NyaWJlKGRhdGEpDQp9DQpiYXNpY19lZGEoZGYpDQpgYGANCg0KIyBNZW1idWF0IGxhcG9yYW4gRURBDQoNCmBgYHtyLCBtZXNzYWdlPUZBTFNFfQ0KbGlicmFyeShEYXRhRXhwbG9yZXIpDQoNCiMgRGF0YUV4cGxvcmVyOjpjcmVhdGVfcmVwb3J0KGRmKQ0KYGBgDQoNCiMgUmVmZXJlbnNpDQoNCiogW01hdGVyaSBNZXRvZGUgU3RhdGlzdGlrYSBQZXJ0ZW11YW4gNV0oaHR0cHM6Ly9ib29rZG93bi5vcmcvQmFrdGlTaXJlZ2FyL2RhdGEtc2NpZW5jZS1mb3ItYmVnaW5uZXJzL0VEQS5odG1sI3F1YW50aXRhdGl2ZSkNCg0KDQoNCg0K