Email : rizal.andriana@student.matanauniversity.ac.id
RPubs : https://rpubs.com/rizalandriana
Github : https://github.com/rizalandriana
Jurusan : Teknik Informatika
Address : ARA Center, Matana University Tower
Jl. CBD Barat Kav, RT.1, Curug Sangereng, Kelapa Dua, Tangerang, Banten 15810.
library(readr)
df = read.csv("students.csv")sapply(df, function(x) sum(is.na(x)))## ï..ID Gender Grade Horoscope Subject IntExt
## 0 0 0 0 0 0
## OptPest ScreenTime Sleep PhysActive HrsHomework SpendTime1
## 0 0 0 1 0 0
## SpendTime2 Self1 Self2 Career Superpower
## 0 0 0 0 0
df <- na.omit(df)
head(df,3)## ï..ID Gender Grade Horoscope Subject IntExt OptPest ScreenTime Sleep
## 1 1 male 4 Scorpio Math Extravert Optimist 1 7
## 2 2 female 4 Capricorn Gym Extravert Optimist 1 8
## 3 3 male 4 Taurus Math Introvert Optimist 4 9
## PhysActive HrsHomework SpendTime1 SpendTime2 Self1 Self2
## 1 10 10 baseball relaxing active competitive
## 2 5 0 playing outside swimming kind active
## 3 22 1 video games soccer active creative
## Career Superpower
## 1 professional baseball player sonic speed
## 2 Teacher power to grant wishes
## 3 professional soccer player powerful kick
table(df$Gender) # Frekuensi dari masing-masing kategori##
## Don't Identify female male
## 2 91 91
prop.table(table(df$Gender)) # Proporsi kategori##
## Don't Identify female male
## 0.01086957 0.49456522 0.49456522
library(readr)
library(dplyr)
library(magrittr)
Cat2 <- df %>%
select(Gender, Horoscope) %>%
table()
Cat2## Horoscope
## Gender Aquarius Aries Cancer Capricorn Gemini Leo Libra Pisces
## Don't Identify 0 1 0 0 0 0 1 0
## female 2 9 12 6 7 11 5 9
## male 8 11 8 9 9 5 6 6
## Horoscope
## Gender Sagittarius Scorpio Taurus Virgo
## Don't Identify 0 0 0 0
## female 8 8 7 7
## male 4 10 8 7
Cat3 <- df %>%
select(Gender, Horoscope, Subject) %>%
# table() # tabel yang mudah dibaca oleh mesin
# prop.table() # proporsi tabel
ftable() # tabel yang dapat dibaca oleh manusia
Cat3## Subject Art Gym History Math Science
## Gender Horoscope
## Don't Identify Aquarius 0 0 0 0 0 0
## Aries 0 0 0 1 0 0
## Cancer 0 0 0 0 0 0
## Capricorn 0 0 0 0 0 0
## Gemini 0 0 0 0 0 0
## Leo 0 0 0 0 0 0
## Libra 0 1 0 0 0 0
## Pisces 0 0 0 0 0 0
## Sagittarius 0 0 0 0 0 0
## Scorpio 0 0 0 0 0 0
## Taurus 0 0 0 0 0 0
## Virgo 0 0 0 0 0 0
## female Aquarius 0 0 2 0 0 0
## Aries 0 3 4 1 1 0
## Cancer 0 4 5 0 1 2
## Capricorn 0 2 3 0 1 0
## Gemini 0 2 2 0 1 2
## Leo 0 2 3 1 3 2
## Libra 0 1 0 0 3 1
## Pisces 0 3 3 0 2 1
## Sagittarius 0 1 2 0 2 3
## Scorpio 0 3 1 0 2 2
## Taurus 0 4 2 0 1 0
## Virgo 0 0 1 1 2 3
## male Aquarius 0 3 3 0 1 1
## Aries 0 1 3 0 4 3
## Cancer 0 0 6 1 1 0
## Capricorn 1 1 6 0 1 0
## Gemini 0 0 4 1 2 2
## Leo 0 3 1 0 0 1
## Libra 0 1 4 0 1 0
## Pisces 0 0 2 0 2 2
## Sagittarius 0 0 1 0 3 0
## Scorpio 0 1 6 0 1 2
## Taurus 0 0 2 1 5 0
## Virgo 0 1 3 1 2 0
Kuan <- df %>%
select_if(is.numeric)
names(Kuan)## [1] "ï..ID" "Grade" "ScreenTime" "Sleep" "PhysActive"
## [6] "HrsHomework"
mean(Kuan$HrsHomework)## [1] 4.1875
quantile(Kuan$HrsHomework)## 0% 25% 50% 75% 100%
## 0 1 3 6 35
median(Kuan$HrsHomework)## [1] 3
mode(Kuan$HrsHomework)## [1] "numeric"
var(Kuan$HrsHomework)## [1] 23.05891
sd(Kuan$HrsHomework)## [1] 4.80197
mad(Kuan$HrsHomework)## [1] 2.9652
IQR(Kuan$HrsHomework)## [1] 5
library(e1071)
skewness(Kuan$HrsHomework)## [1] 2.606671
kurtosis(Kuan$HrsHomework)## [1] 10.2512
cov(Kuan$HrsHomework, Kuan$Sleep)## [1] -0.3942964
cor(Kuan$HrsHomework, Kuan$Sleep)## [1] -0.0534581
zscore <- (Kuan$HrsHomework - mean(Kuan$HrsHomework)) / sd(Kuan$HrsHomework)cov(Kuan)## ï..ID Grade ScreenTime Sleep PhysActive
## ï..ID 2867.736250 2.1336719 5.842317 -5.9110982 17.9871703
## Grade 2.133672 1.9398610 1.331299 -0.6939742 -3.1610834
## ScreenTime 5.842317 1.3312990 5.383872 -1.7120976 -4.8129009
## Sleep -5.911098 -0.6939742 -1.712098 2.3592822 0.2690663
## PhysActive 17.987170 -3.1610834 -4.812901 0.2690663 139.4967926
## HrsHomework -36.511270 0.7885929 2.431182 -0.3942964 6.5054645
## HrsHomework
## ï..ID -36.5112705
## Grade 0.7885929
## ScreenTime 2.4311817
## Sleep -0.3942964
## PhysActive 6.5054645
## HrsHomework 23.0589139
cor(Kuan)## ï..ID Grade ScreenTime Sleep PhysActive
## ï..ID 1.00000000 0.02860703 0.04701843 -0.07186354 0.02843879
## Grade 0.02860703 1.00000000 0.41194850 -0.32439047 -0.19216245
## ScreenTime 0.04701843 0.41194850 1.00000000 -0.48038694 -0.17562140
## Sleep -0.07186354 -0.32439047 -0.48038694 1.00000000 0.01483157
## PhysActive 0.02843879 -0.19216245 -0.17562140 0.01483157 1.00000000
## HrsHomework -0.14198351 0.11790932 0.21819792 -0.05345810 0.11470353
## HrsHomework
## ï..ID -0.1419835
## Grade 0.1179093
## ScreenTime 0.2181979
## Sleep -0.0534581
## PhysActive 0.1147035
## HrsHomework 1.0000000
library(funModeling)
library(tidyverse)
library(Hmisc)
library(skimr)
basic_eda <- function(data)
{
glimpse(data)
skim(data)
df_status(data)
freq(data)
profiling_num(data)
plot_num(data)
describe(data)
}
basic_eda(df)library(DataExplorer)
# DataExplorer::create_report(df)