library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(tidyr)
## Warning: package 'tidyr' was built under R version 4.5.2
library(ggplot2)
df_outlieer <- read.csv("LATIHAN OUTLIER.csv")
is.na(df_outlieer)
## ID Nama Umur IPK Pengeluaran..Rp. Kota
## [1,] FALSE FALSE FALSE FALSE FALSE FALSE
## [2,] FALSE FALSE FALSE FALSE FALSE FALSE
## [3,] FALSE FALSE FALSE FALSE FALSE FALSE
## [4,] FALSE FALSE FALSE FALSE FALSE FALSE
## [5,] FALSE FALSE FALSE FALSE FALSE FALSE
## [6,] FALSE FALSE TRUE FALSE FALSE FALSE
## [7,] FALSE FALSE FALSE TRUE FALSE FALSE
## [8,] FALSE FALSE FALSE FALSE FALSE FALSE
## [9,] FALSE FALSE FALSE FALSE FALSE FALSE
## [10,] FALSE FALSE FALSE FALSE FALSE FALSE
## [11,] FALSE FALSE FALSE FALSE FALSE FALSE
## [12,] FALSE FALSE FALSE FALSE FALSE FALSE
## [13,] FALSE FALSE FALSE FALSE FALSE FALSE
## [14,] FALSE FALSE FALSE FALSE FALSE FALSE
## [15,] FALSE FALSE FALSE FALSE FALSE FALSE
## [16,] FALSE FALSE FALSE FALSE FALSE FALSE
## [17,] FALSE FALSE FALSE FALSE FALSE FALSE
## [18,] FALSE FALSE FALSE FALSE FALSE FALSE
## [19,] FALSE FALSE FALSE TRUE FALSE FALSE
## [20,] FALSE FALSE FALSE FALSE FALSE FALSE
## [21,] FALSE FALSE FALSE FALSE FALSE FALSE
## [22,] FALSE FALSE FALSE FALSE FALSE FALSE
## [23,] FALSE FALSE FALSE FALSE FALSE FALSE
## [24,] FALSE FALSE FALSE FALSE FALSE FALSE
## [25,] FALSE FALSE FALSE FALSE FALSE FALSE
## [26,] FALSE FALSE FALSE FALSE FALSE FALSE
## [27,] FALSE FALSE FALSE FALSE FALSE FALSE
## [28,] FALSE FALSE FALSE FALSE FALSE FALSE
## [29,] FALSE FALSE FALSE FALSE FALSE FALSE
## [30,] FALSE FALSE FALSE FALSE FALSE FALSE
## [31,] FALSE FALSE FALSE FALSE FALSE FALSE
## [32,] FALSE FALSE FALSE FALSE FALSE FALSE
## [33,] FALSE FALSE FALSE FALSE FALSE FALSE
## [34,] FALSE FALSE FALSE FALSE FALSE FALSE
## [35,] FALSE FALSE FALSE FALSE FALSE FALSE
## [36,] FALSE FALSE FALSE FALSE FALSE FALSE
## [37,] FALSE FALSE FALSE FALSE FALSE FALSE
## [38,] FALSE FALSE FALSE FALSE FALSE FALSE
## [39,] FALSE FALSE FALSE FALSE FALSE FALSE
## [40,] FALSE FALSE FALSE FALSE FALSE FALSE
colSums(is.na(df_outlieer))
## ID Nama Umur IPK
## 0 0 1 2
## Pengeluaran..Rp. Kota
## 0 0
library(VIM)
## Warning: package 'VIM' was built under R version 4.5.2
## Loading required package: colorspace
## Warning: package 'colorspace' was built under R version 4.5.2
## Loading required package: grid
## VIM is ready to use.
## Suggestions and bug-reports can be submitted at: https://github.com/statistikat/VIM/issues
##
## Attaching package: 'VIM'
## The following object is masked from 'package:datasets':
##
## sleep
aggr(df_outlieer, numbers = TRUE, prop = FALSE)

?aggr
## starting httpd help server ... done
df_outlieer$IPK[is.na(df_outlieer$IPK)] <- median(df_outlieer$IPK, na.rm = TRUE)
df_outlieer$Umur[is.na(df_outlieer$Umur)] <- median(df_outlieer$Umur, na.rm = TRUE)
df_outlieer
## ID Nama Umur IPK Pengeluaran..Rp. Kota
## 1 1 Andi 20 3.50 1.000.000 Jakarta
## 2 2 Budi 21 2.80 1.200.000 Surabaya
## 3 3 Citra 19 3.90 800 Bandung
## 4 4 Deni 22 3.20 1.500.000 Medan
## 5 5 Deni 22 3.20 1.500.000 Medan
## 6 6 Eka 20 3.00 900 Jogja
## 7 7 Farhan 20 3.25 1.100.000 Makassar
## 8 8 Gita 21 3.70 150.000.000 Jakarta
## 9 9 Hadi 20 3.40 1.000.000 NaN
## 10 10 Indah 19 3.60 850 Semarang
## 11 11 Jaka 45 3.10 2.000.000 Bandung
## 12 12 Kevin 20 0.50 950 Surabaya
## 13 13 Laras 21 3.80 NaN Jakarta
## 14 14 Maya 20 3.30 1.050.000 Medan
## 15 15 Niko 22 2.90 1.300.000 Jogja
## 16 16 Olivia 19 4.00 700 Bandung
## 17 17 Putri 20 3.50 1.000.000 Jakarta
## 18 18 Qori 21 3.20 1.150.000 Surabaya
## 19 19 Rian 22 3.25 1.400.000 Semarang
## 20 20 Sari 20 3.70 900 Makassar
## 21 21 Tono 20 3.50 1.000.000 Jakarta
## 22 22 Umar 21 3.00 1.200.000 Surabaya
## 23 23 Umar 21 3.00 1.200.000 Surabaya
## 24 24 Vina 19 3.90 800 Bandung
## 25 25 Wawan 22 3.20 1.500.000 Medan
## 26 26 Xena 20 3.00 900 Jogja
## 27 27 Yogi 20 1.20 1.100.000 Makassar
## 28 28 Zara 21 3.70 500 Jakarta
## 29 29 Andi 20 3.50 1.000.000 Jakarta
## 30 30 Budi 21 2.80 1.200.000 Surabaya
## 31 31 Citra 19 3.90 800 Bandung
## 32 32 Deni 22 3.20 1.500.000 Medan
## 33 33 Eka 20 3.00 900 Jogja
## 34 34 Farhan 20 3.10 1.100.000 Makassar
## 35 35 Gita 21 3.70 1.000.000 Jakarta
## 36 36 Hadi 20 3.40 1.000.000 Semarang
## 37 37 Indah 19 3.60 850 Semarang
## 38 38 Jaka 21 3.10 2.000.000 Bandung
## 39 39 Kevin 20 3.20 950 Surabaya
## 40 40 Laras 21 3.80 1.100.000 Jakarta
colSums(is.na(df_outlieer))
## ID Nama Umur IPK
## 0 0 0 0
## Pengeluaran..Rp. Kota
## 0 0
Q1 <- quantile(df_outlieer$IPK, 0.25)
Q3 <- quantile(df_outlieer$IPK, 0.75)
IQR <- Q3 - Q1
lower_bound <- Q1 - 1.5 * IQR
upper_bound <- Q3 + 1.5 * IQR
outliersa <- df_outlieer$IPK < lower_bound
outliers <- df_outlieer$IPK > upper_bound
sum(outliersa)
## [1] 2
sum(outliersa)
## [1] 2
boxplot(df_outlieer$IPK, main = "Boxplot Ozone", col = "red")

sum(duplicated(df_outlieer))
## [1] 0
summary(df_outlieer)
## ID Nama Umur IPK
## Min. : 1.00 Length:40 Min. :19.00 Min. :0.500
## 1st Qu.:10.75 Class :character 1st Qu.:20.00 1st Qu.:3.075
## Median :20.50 Mode :character Median :20.00 Median :3.250
## Mean :20.50 Mean :21.05 Mean :3.240
## 3rd Qu.:30.25 3rd Qu.:21.00 3rd Qu.:3.625
## Max. :40.00 Max. :45.00 Max. :4.000
## Pengeluaran..Rp. Kota
## Length:40 Length:40
## Class :character Class :character
## Mode :character Mode :character
##
##
##